块设备子系统是Linux中一个很重要的模块,在inode中有个字段是block_device类型的,它指向一个快设备,但是注意这里的“块设备”和设备驱动里面的块设备的差别是非常大的,这里的块设备是一个比较高层的概念,而最底层的块设备在文件系统还触及不到。
还是按照往常的方式来分析一下代码:首先看一下
struct bus_type ide_bus_type = {
.name = "ide",
.match = ide_bus_match,
.uevent = ide_uevent,
.probe = generic_ide_probe,
.remove = generic_ide_remove,
.shutdown = generic_ide_shutdown,
.dev_attrs = ide_dev_attrs,
.suspend = generic_ide_suspend,
.resume = generic_ide_resume,
};
如果新加入一个设备或者驱动的时候就要进行枚举,这是很明了的了,一旦匹配成功则要调用总线或者驱动的Probe函数,这里是generic_ide_probe:
static int generic_ide_probe(struct device *dev)
{
ide_drive_t *drive = to_ide_device(dev);
ide_driver_t *drv = to_ide_driver(dev->driver);
return drv->probe ? drv->probe(drive) : -ENODEV;
}
下面再看一个结构就好办了:
static ide_driver_t idedisk_driver = {
.gen_driver = {
.owner = THIS_MODULE,
.name = "ide-disk",
.bus = &ide_bus_type,
},
.probe = ide_disk_probe, //探测
.remove = ide_disk_remove, //移除
.shutdown = ide_device_shutdown, //关闭
.version = IDEDISK_VERSION,
.media = ide_disk, //媒介类型
.supports_dsc_overlap = 0,
.do_request = ide_do_rw_disk, //请求处理函数
.end_request = ide_end_request, //请求处理结束
.error = __ide_error,
.abort = __ide_abort,
.proc = idedisk_proc,
};
下面就要调用ide_disk_probe函数了,看一下代码:
static int ide_disk_probe(ide_drive_t *drive)
{
struct ide_disk_obj *idkp;
struct gendisk *g;
/* strstr("foo", "") is non-NULL */
if (!strstr("ide-disk", drive->driver_req))
goto failed;
if (!drive->present)
goto failed;
if (drive->media != ide_disk)
goto failed;
idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
if (!idkp)
goto failed;
g = alloc_disk_node(1 << PARTN_BITS,
hwif_to_node(drive->hwif));
if (!g)
goto out_free_idkp;
ide_init_disk(g, drive);
ide_register_subdriver(drive, &idedisk_driver);
kref_init(&idkp->kref);
idkp->drive = drive;
idkp->driver = &idedisk_driver;
idkp->disk = g;
g->private_data = &idkp->driver;
drive->driver_data = idkp;
idedisk_setup(drive);
if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?/n",
drive->name, drive->head);
drive->attach = 0;
} else
drive->attach = 1;
g->minors = 1 << PARTN_BITS;
strcpy(g->devfs_name, drive->devfs_name);
g->driverfs_dev = &drive->gendev;
g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
set_capacity(g, idedisk_capacity(drive));
g->fops = &idedisk_ops;
add_disk(g);
return 0;
…
}
回想一下统一的设备模型,主要是device_driver和device在主持大局,那么块设备中当然也少不了这两个老大其实在文件系统中是看不到它们的身影的,只有再往下才行,实际上它们就是:ide_driver_t和ide_drive_t。那么现在分析一下这个里面调用的几个函数:
struct ide_disk_obj {
ide_drive_t *drive;
ide_driver_t *driver;
struct gendisk *disk;
struct kref kref;
};
先说一下这个粘合结构,就是它将底层的ide_drive_t设备和文件系统的gendisk联系了起来,那么底层drive由谁来驱动呢?当然是driver域了,这样它把设备和驱动也联系了起来
void ide_init_disk(struct gendisk *disk, ide_drive_t *drive)
{
ide_hwif_t *hwif = drive->hwif;
unsigned int unit = (drive->select.all >> 4) & 1;
disk->major = hwif->major;
disk->first_minor = unit << PARTN_BITS;
sprintf(disk->disk_name, "hd%c", 'a' + hwif->index * MAX_DRIVES + unit);
disk->queue = drive->queue;
}
调用完之后这个函数,就要:
idkp->drive = drive;
idkp->driver = &idedisk_driver;
idkp->disk = g;
重要的是这个idedisk_driver,它是:
static ide_driver_t idedisk_driver = {
见上面的定义......
};
之后又有一个重量级的函数,idedisk_setup:
static void idedisk_setup (ide_drive_t *drive)
{
struct hd_driveid *id = drive->id;
unsigned long long capacity;
idedisk_add_settings(drive);
if (drive->id_read == 0)
return;
if (drive->removable) {
if (id->model[0] != 'W' || id->model[1] != 'D') {
drive->doorlocking = 1;
}
}
(void)set_lba_addressing(drive, 1);
if (drive->addressing == 1) {
ide_hwif_t *hwif = HWIF(drive);
int max_s = 2048;
if (max_s > hwif->rqsize)
max_s = hwif->rqsize;
blk_queue_max_sectors(drive->queue, max_s);
}
printk(KERN_INFO "%s: max request size: %dKiB/n", drive->name, drive->queue->max_sectors / 2);
init_idedisk_capacity (drive);
if (drive->addressing == 0 && drive->capacity64 > 1ULL << 28) {
printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
"%llu sectors (%llu MB)/n",
drive->name, (unsigned long long)drive->capacity64,
sectors_to_MB(drive->capacity64));
drive->capacity64 = 1ULL << 28;
}
if (drive->hwif->no_lba48_dma && drive->addressing) {
if (drive->capacity64 > 1ULL << 28) {
printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode will be used for accessing sectors > %u/n",
drive->name, 1 << 28);
} else
drive->addressing = 0;
}
capacity = idedisk_capacity (drive);
if (!drive->forced_geom) {
if (idedisk_supports_lba48(drive->id)) {
drive->bios_sect = 63;
drive->bios_head = 255;
}
if (drive->bios_sect && drive->bios_head) {
unsigned int cap0 = capacity; /* truncate to 32 bits */
unsigned int cylsz, cyl;
if (cap0 != capacity)
drive->bios_cyl = 65535;
else {
cylsz = drive->bios_sect * drive->bios_head;
cyl = cap0 / cylsz;
if (cyl > 65535)
cyl = 65535;
if (cyl > drive->bios_cyl)
drive->bios_cyl = cyl;
}
}
}
printk(KERN_INFO "%s: %llu sectors (%llu MB)",
drive->name, capacity, sectors_to_MB(capacity));
if (id->buf_size)
printk (" w/%dKiB Cache", id->buf_size/2);
printk(", CHS=%d/%d/%d",
drive->bios_cyl, drive->bios_head, drive->bios_sect);
if (drive->using_dma)
ide_dma_verbose(drive);
printk("/n");
if ((id->csfo & 1) || (id->cfs_enable_1 & (1 << 5)))
drive->wcache = 1;
write_cache(drive, 1);
}
这个函数主要就是建立完整的gendisk结构,以后就可以让block_device用了,而block_device让inode,address_space用,这样底层就和上层联系了起来。
这里又涉及到了硬件和软件的问题,表现为设备结构和驱动程序的问题,我一直都有个疑问,就是blk_init_queue到底在哪里调用,register_blkdev和XX_probe的调用关系是什么?其实理解了硬件设备结构和设备驱动的关系后就好理解了,设备结构比如:scsi_device或ide_drive等都是在硬件总线枚举的时候被初始化的,而其驱动则是在模块加载时初始化的,相应的,请求队列是属于硬件设备的而不是属于驱动的,所以请求队列是在设备建立的时候初始化的,这个时候不管有没有驱动都初始化请求队列,下面分析调用过程:
在总线枚举的时候,各个总线上都游离着一些设备结构,比如pci总线上可能就会有ide设备结构,调用如下:
ide_generic_init调用ideprobe_init
ideprobe_init-->|for (index = 0; index < MAX_HWIFS; ++index)
if (probe[index])
probe_hwif(&ide_hwifs[index])
|for (index = 0; index < MAX_HWIFS; ++index)
if (probe[index])
hwif_init(&ide_hwifs[index]);-->|register_blkdev(hwif->major, hwif->name)
|init_irq(hwif)-->|request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup)
|for (index = 0; index < MAX_DRIVES; ++index) {
ide_drive_t *drive = &hwif->drives[index];
if (!drive->present)
continue;
if (ide_init_queue(drive)) {这里初始化请求队列
printk(KERN_ERR "ide: failed to init %s/n",drive->name);
continue;
}
......
|for (index = 0; index < MAX_HWIFS; ++index) {
if (probe[index]) {
ide_hwif_t *hwif = &ide_hwifs[index];
int unit;
if (!hwif->present)
continue;
if (hwif->chipset == ide_unknown || hwif->chipset == ide_forced)
hwif->chipset = ide_generic;
for (unit = 0; unit < MAX_DRIVES; ++unit)
if (hwif->drives[unit].present)
device_register(&hwif->drives[unit].gendev);
}
}
在设备结构初始化了以后,那么什么时候加载驱动的呢?遍查代码发现是一个idedisk_init
static int __init idedisk_init(void)
{
return driver_register(&idedisk_driver.gen_driver);
}
进一步的driver_register就很熟悉了,马上就到了ide_disk_probe函数,与上面分析的接在了一起,通过简单的推理,可以证明idedisk_init肯定发生在请求队列初始化之后,因为在ide_disk_probe中直接就用到了ide_drive的队列,并将之赋给新创建的gendisk的请求队列。下面看看整个过程:
static int __init ide_init(void)
{
printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "/n");
devfs_mk_dir("ide");
system_bus_speed = ide_system_bus_speed();
bus_register(&ide_bus_type);
init_ide_data();
#ifdef CONFIG_PROC_FS
proc_ide_root = proc_mkdir("ide", NULL);
#endif
#ifdef CONFIG_BLK_DEV_ALI14XX
if (probe_ali14xx)
(void)ali14xx_init();
#endif
......
initializing = 1;
/* Probe for special PCI and other "known" interface chipsets. */
probe_for_hwifs();
initializing = 0;
#ifdef CONFIG_PROC_FS
proc_ide_create();
#endif
return 0;
}
此处只关心ali14xx_init()
int __init ali14xx_init(void)
{
if (findPort()) {
if (ali14xx_probe())
return -ENODEV;
return 0;
}
printk(KERN_ERR "ali14xx: not found./n");
return -ENODEV;
}
static int __init ali14xx_probe(void)
{
ide_hwif_t *hwif, *mate;
printk(KERN_DEBUG "ali14xx: base=0x%03x, regOn=0x%02x./n",
basePort, regOn);
if (!initRegisters()) {
printk(KERN_ERR "ali14xx: Chip initialization failed./n");
return 1;
}
hwif = &ide_hwifs[0];
mate = &ide_hwifs[1];
hwif->chipset = ide_ali14xx;
hwif->tuneproc = &ali14xx_tune_drive;
hwif->mate = mate;
mate->chipset = ide_ali14xx;
mate->tuneproc = &ali14xx_tune_drive;
mate->mate = hwif;
mate->channel = 1;
probe_hwif_init(hwif);
probe_hwif_init(mate);
create_proc_ide_interfaces();
return 0;
}
int probe_hwif_init(ide_hwif_t *hwif)
{
return probe_hwif_init_with_fixup(hwif, NULL);
}
int probe_hwif_init_with_fixup(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
{
probe_hwif(hwif);
if (fixup)
fixup(hwif);
if (!hwif_init(hwif)) {
printk(KERN_INFO "%s: failed to initialize IDE interface/n",
hwif->name);
return -1;
}
if (hwif->present) {
u16 unit = 0;
for (unit = 0; unit < MAX_DRIVES; ++unit) {
ide_drive_t *drive = &hwif->drives[unit];
if (drive->present) {
device_register(&drive->gendev);
}
}
}
return 0;
}
这个函数里最重要的回调有3个:probe_hwif和hwif_init还有device_register,其中最后一个最终调用device_add,这样就有了一次probe的机会,probe_hwif首先看看设备的状态,然后hwif_init上面已经分析过了,只不过这里分析的是通过另一条路径到达hwif_init函数的,也就是说有两条内核调用路径都可以经历这一切。
又是一个设备驱动。这几天分析了这么多的设备驱动,感觉只有把握住系统的框架才能更好的理解代码,进一步才能更好的修改代码,虽然这些天分析的代码数量很大,但是都是在一个框架内的,从usb到pci,再到网卡驱动输入子系统,ide驱动,scsi驱动,都是这样的.
在ide_init里还有一个重要的路径:
ide_init--> probe_for_hwifs
static void __init probe_for_hwifs (void)
{
#ifdef CONFIG_BLK_DEV_IDEPCI
ide_scan_pcibus(ide_scan_direction);
#endif /* CONFIG_BLK_DEV_IDEPCI */
......
#ifdef CONFIG_BLK_DEV_IDE_PMAC
{
extern void pmac_ide_probe(void);
pmac_ide_probe();
}
#endif /* CONFIG_BLK_DEV_IDE_PMAC */
......
}
static int __init ide_scan_pcidev(struct pci_dev *dev)
{
struct list_head *l;
struct pci_driver *d;
list_for_each(l, &ide_pci_drivers)
{
d = list_entry(l, struct pci_driver, node);
if(d->id_table)
{
const struct pci_device_id *id = pci_match_id(d->id_table, dev);
if(id != NULL)
{
if(d->probe(dev, id) >= 0)
{
dev->driver = d;
return 1;
}
}
}
}
return 0;
}
pmac_ide_probe():
pmac_ide_probe(void)
{
if (!machine_is(powermac))
return;
macio_register_driver(&pmac_ide_macio_driver);
pci_register_driver(&pmac_ide_pci_driver);
}
看一下熟悉的结构:
static struct pci_driver pmac_ide_pci_driver = {
.name = "ide-pmac",
.id_table = pmac_ide_pci_match,
.probe = pmac_ide_pci_attach,
.suspend = pmac_ide_pci_suspend,
.resume = pmac_ide_pci_resume,
};
然后:
static int __devinit
pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id)
{
ide_hwif_t *hwif;
struct device_node *np;
pmac_ide_hwif_t *pmif;
void __iomem *base;
unsigned long rbase, rlen;
int i, rc;
np = pci_device_to_OF_node(pdev);
if (np == NULL) {
…
}
i = 0;
while (i < MAX_HWIFS && (ide_hwifs[i].io_ports[IDE_DATA_OFFSET] != 0
|| pmac_ide[i].node != NULL))
++i;
if (i >= MAX_HWIFS) {
…
}
pmif = &pmac_ide[i];
hwif = &ide_hwifs[i];
if (pci_enable_device(pdev)) {
…
}
pci_set_master(pdev);
if (pci_request_regions(pdev, "Kauai ATA")) {
…
}
hwif->pci_dev = pdev;
hwif->gendev.parent = &pdev->dev;
pmif->mdev = NULL;
pmif->node = np;
rbase = pci_resource_start(pdev, 0);
rlen = pci_resource_len(pdev, 0);
base = ioremap(rbase, rlen);
pmif->regbase = (unsigned long) base + 0x2000;
pmif->kauai_fcr = base;
pmif->irq = pdev->irq;
pci_set_drvdata(pdev, hwif);
rc = pmac_ide_setup_device(pmif, hwif);
if (rc != 0) {
/* The inteface is released to the common IDE layer */
pci_set_drvdata(pdev, NULL);
iounmap(base);
memset(pmif, 0, sizeof(*pmif));
pci_release_regions(pdev);
}
return rc;
}
这里和pci联系了起来