Linux
1
Linux
http://www.ilinuxkernel.com
http://www.ilinuxkernel.com/
Linux
2
1 ........................................................................................................................... 42 ...................................................................................................... 52.1 Sectors .................................................................................................. 52.2 Blocks ....................................................................................................... 62.3 Segments .................................................................................................. 6
3 ............................................................................................................. 83.1 ................................................................................................ 83.2 bio ........................................................................................................... 123.3 bio .................................................................................. 15
4 ..................................................................................... 164.1 ....................................................................................... 164.2 ................................................................................... 19
4.2.1 ...................................................................................................... 194.2.2 ll_rw_lock .............................................................................................. 204.2.3 submit_bh ............................................................................................. 224.2.4 generic_make_request ......................................................................... 244.2.5 __generic_make_request ..................................................................... 26
Linux
3
1 LinuxI/O ........................................................................................................ 4
2 ................................................................................................................ 7
3 .................................................................................................... 10
4 biobio_vecpage .................................................................... 14
Linux
4
1
1
read
1 LinuxI/O
1readVFSVirtual Filesystem Switch
2VFS
3
Mapping Layer
4Generic Block LayerI/O
Virtual Filesystem (VFS) Layer
Generic Block Layer
Buffer Cache (Page Cache)
I/O Scheduler Layer
Block Device Driver Block Device Driver
Hard Disk SSD
Direct IO
Request Queue
Request Queue
Kernel Space
Storage Media
Mapping Layer Disk Filesystem
Disk Filesystem
Linux
5
5I/OI/O Scheduler Layer
I/O
6Block Device Driver
12LinuxVFSVirtual Filesystem
Switchsys_readsys_write
VFS3Mapping Layer
4
2
chunk
CD-ROMSSD
-
2.1 Sectors
2512
512CD-ROM2k
512
Linux
6
sector indices3264
sector_t
2.2 Blocks
VFS
block
inode
2
2
5121K4K
block
2.3 Segments
I/O
DMA
DMA
DMA/
DMA
/scatter-gatherDMA
/DMA
Linux
7
/DMAsegments
/DMA
generice block layer
physical segment
2
Page
Block B
ufferSector
Sector
Block B
uffer
Sector
Sector
Block B
uffer
Sector
Sector
Block B
uffer
Sector
Sector
Segment
512B
512B
512B
512B
512B
512B
512B
512B
1KB
1KB
1KB
1KB
3KB
4KB
2
sectorI/O
mapping layer
block
Linux
8
/DMA
1
pagepage frame
4KB4KRAM4K
3
Generic Block Layer
CPU
zero-copy
I/O
LVMLogical Volume ManagerRAIDRedundant Array of
Inexpensive Disks
DMAI/O
3.1
Linux
9
block buffer
buffer headbuffer_head
buffer_head
include/linux/buffer_head.h
00061: struct bbuuffffeerr__hheeaadd { 00062: unsigned long b_state; / * buffer state bitmap (see above) */ 00063: struct buffer_head *b_this_page;/ * circular list of page's buffers */ 00064: struct page *b_page; / * the page this bh is mapped to */ 00065: 00066: sector_t b_blocknr; / * start block number */ 00067: size_t b_size; / * size of mapping */ 00068: char *b_data; / * pointer to data within the page */ 00069: 00070: struct block_device *b_bdev; 00071: bh_end_io_t *b_end_io; / * I/ O completion */ 00072: void *b_private; / * reserved for b_end_io */ 00073: struct list_head b_assoc_buffers; / * associated with another mapping */ 00074: struct address_space *b_assoc_map; / * mapping this buffer is 00075: associated with */ 00076: atomic_t b_count; / * users using this buffer_head */ 00077: };
b_state
b_this_page
b_page
b_blocknr
b_size
b_data
b_dev
b_end_ioI/O
b_privateI/O
b_assoc_buffers
b_assoc_mapbufferaddress_space
Linux
10
b_count
3
b_state1
bh_state_bitsinclude/linux/buffer_head.h
00019: enum bbhh__ssttaattee__bbiittss { 00020: BH_Uptodate, / * Contains valid data */ 00021: BH_Dirty, / * Is dirty */ 00022: BH_Lock,/ * Is locked */ 00023: BH_Req, / * Has been submitted for I/ O */ 00024: BH_Uptodate_Lock,/ * Used by the first bh in a page, to serialise 00025: * IO completion of other buffers in the page 00026: */ 00027: 00028: BH_Mapped, / * Has a disk mapping */ 00029: BH_New, / * Disk mapping was newly created by get_block */ 00030: BH_Async_Read, / * Is under end_buffer_async_read I/ O */ 00031: BH_Async_Write, / * Is under end_buffer_async_write I/ O */ 00032: BH_Delay, / * Buffer is not yet allocated on disk */ 00033: BH_Boundary, / * Block is followed by a discontiguity */ 00034: BH_Write_EIO, / * I/ O error on write */ 00035: BH_Ordered, / * DEPRECATED: ordered write */ 00036: BH_Eopnotsupp, / * DEPRECATED: operation not supported (barrier) */ 00037: BH_Unwritten, / * Buffer is allocated on disk but not written */ 00038: BH_Quiet, / * Buffer Error Prinks to be quiet */ 00039: 00040: BH_PrivateStart,/ * not a state bit, but the first bit available 00041: * for private allocation by other entities 00042: */ 00043: };
1 bh_state
BH_Uptodate buffer BH_Dirty buffer
BH_Lock bufferI/O
Linux
11
BH_Req bufferI/O BH_Mapped buffer BH_New bufferget_block BH_Async_Read bufferend_buffer_async_read BH_Async_Write bufferend_buffer_async_write BH_Delay buffer BH_Boundary BH_Write_EIO buffer BH_Ordered ordered BH_Eopnosupp BH_Unwritten buffer BH_Quiet buffer
bh_state_bitsBH_PrivateStart
I/OBH_PrivateStart
b_state
I/O
b_countget_bhput_bh
include/linux/buffer_head.h
00279: static inline void ggeett__bbhh(struct buffer_head *bh00280: {
)
00281: atomic_inc(&bh->b_count); 00282: } 00283:
00284: static inline void ppuutt__bbhh(struct buffer_head *bh00285: {
)
00286: smp_mb before_atomic_dec(); 00287: atomic_dec(&bh->b_count); 00288: } 00289:
get_bh
put_bh
b_blocknrb_bdev
b_pageb_data
Linux
12
b_pageb_size
b_datab_data+b_size
3.2 bio
I/Obioinclude/linux/bio.h
segmentI/O
bioI/OI/O
-I/O
bio 00060: / * 00061: * main unit of I/ O for the block layer and lower layers (ie drivers and 00062: * stacking drivers) 00063: */ 00064: struct bbiioo { 00065: sector_t bi_sector; / * device address in 512 byte 00066: 00067: struct bio *bi_next; / * request queue link */ 00068: struct block_device *bi_bdev; 00069: unsigned long bi_flags; / * status, command, etc */ 00070: unsigned long bi_rw; / * bottom bits READ/ WRITE, 00071: * top bits priority 00072: */ 00073: 00074: unsigned short bi_vcnt; / * how many bio_vec's */ 00075: unsigned short bi_idx; / * current index into bvl_vec */ 00076: 00077: / * Number of segments in this BIO after 00078: * physical address coalescing is performed. 00079: */ 00080: unsigned int bi_phys_segments; 00081: 00082: unsigned int bi_size; / * residual I/ O count */ 00083: 00084: / * 00085: * To keep track of the max segment size, we account for the 00086: * sizes of the first and last mergeable segments in this bio. 00087: */ 00088: unsigned int bi_seg_front_size; 00089: unsigned int bi_seg_back_size; 00090: 00091: unsigned int bi_max_vecs; / * max bvl_vecs we can hold */ 00092: 00093: unsigned int bi_comp_cpu; / * completion CPU */ 00094: 00095: atomic_t bi_cnt; / * pin count */ 00096: 00097: struct bio_vec *bi_io_vec; / * the actual vec list */ 00098:
Linux
13
00099: bio_end_io_t *bi_end_io; 00100: 00101: void *bi_private; 00102: #if defined(CONFIG_BLK_DEV_INTEGRITY) 00103: struct bio_integrity_payload *bi_integrity; / * data integrity */ 00104: #endif 00105: 00106: bio_destructor_t *bi_destructor; / * destructor */ 00107: 00108: / * 00109: * We can inline a number of vecs at the end of the bio, to avoid 00110: * double allocations for a small number of bio_vecs. This member 00111: * MUST obviously be kept at the very end of