前面章节描述了phy设备的探测过程,虽然没有把代码贴出来,但是两个接口的差异是描述出来了的,相信感兴趣的读者一定会去细细查看of_mdiobus_register 和mdiobus_register这两个代码。其实关于phy探测的所有细节就在这两个函数中,希望读者能够仔细去看下。下面我们开始新的内容即phy设备如何被mac attached?phy探测过程是mac感知到了phy并且创建和注册了phy设备。根据Linux设备驱动模型可知,一旦注册phy设备,如果内核中存在对应的phy驱动的话,必然导致mdio_bus_type总线的匹配操作,从而触发phydrv->probe()函数。但是这还不够,我细看到phydrv的定义话,会发现其内部还定义了很多其他的接口,这些接口涉及到phy自协商,phy状态等信息,这些接口又是如何被触发的呢?
1、mac attach phy
下面代码片段摘自fec_enet_mii_probe,fec_enet_mii_probe在fec_enet_open中被调用,用于探测mac对应的phy是否连接。如果无phy设备则整个emac初始化也会失败。
//如果存在"phy-handle"或者"fixed-link"属性则设置其对应的设备树节点,fep->phy_node
if (fep->phy_node) {
//遍历mdio_bus_type总线上设备列表,通过of_phy_match判断总线上设备节点和参数phy_np是否一致,一致且为phy设备则返回对应的设备结构
//如果phy设备无驱动则使用通用phy驱动,否则使用匹配的phy驱动,调用soft_reset,config_init,resume驱动
//启动phy状态检测延迟工作队列
//如果phy使用中断则开启phy中断线程化
phy_dev = of_phy_connect(ndev, fep->phy_node, &fec_enet_adjust_link, 0,fep->phy_interface);
if (!phy_dev) {
netdev_err(ndev, "Unable to connect to phy\n");
return -ENODEV;
}
} else { //如果无"phy-handle"或者"fixed-link"属性,但是通过mdiobus_scan发现了phy设备,如果存在多个phy设备但是使用第一个phy
/* check for attached phy */
for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
if (!mdiobus_is_registered_device(fep->mii_bus, phy_id)) //bus->mdio_map[addr];
continue; //当前phy addr上无mdio设备则continue
if (dev_id--) //理论上dev_idi应该为0,非0则continue
continue;
strlcpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
break;
}
if (phy_id >= PHY_MAX_ADDR) {
netdev_info(ndev, "no PHY, assuming direct connection to switch\n");
strlcpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE);
phy_id = 0;
}
snprintf(phy_name, sizeof(phy_name),PHY_ID_FMT, mdio_bus_id, phy_id);
//遍历mdio_bus_type总线上设备列表,通过of_phy_match判断总线上设备节点和参数phy_np是否一致,一致且为phy设备则返回对应的设备结构
//如果phy设备无驱动则使用通用phy驱动,否则使用匹配的phy驱动,调用soft_reset,config_init,resume驱动
//启动phy状态检测延迟工作队列
//如果phy使用中断则开启phy中断线程化
phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link,fep->phy_interface);
//执行到这里,phy应该进入工作状态
}
这里由出现了两个类似的函数of_phy_connect和phy_connect。of_phy_connect和phy_connect最终都会调用phy_connect_direct,of_phy_connect首先查找将设备树节点对应的phy device,之后再调用phy_connect_direct。
下面是一坨详细注释过的代码:
struct phy_device *of_phy_connect(struct net_device *dev,struct device_node *phy_np,void (*hndlr)(struct net_device *), u32 flags,phy_interface_t iface)
{
//遍历mdio_bus_type总线上设备列表,通过of_phy_match判断总线上设备节点和参数phy_np是否一致,一致且为phy设备则返回对应的设备结构
struct phy_device *phy = of_phy_find_device(phy_np);
int ret;
if (!phy)
return NULL;
phy->dev_flags = flags;
//如果phy设备无驱动则使用通用phy驱动,否则使用匹配的phy驱动,调用soft_reset,config_init,resume驱动
ret = phy_connect_direct(dev, phy, hndlr, iface);
/* refcount is held by phy_connect_direct() on success */
put_device(&phy->mdio.dev);
return ret ? NULL : phy;
}
struct phy_device *of_phy_find_device(struct device_node *phy_np)
{
struct device *d;
struct mdio_device *mdiodev;
if (!phy_np)
return NULL;
//遍历总线上设备列表,通过of_phy_match判断总线上设备节点和参数phy_np是否一致,一致则返回对应的设备结构
d = bus_find_device(&mdio_bus_type, NULL, phy_np, of_phy_match);
if (d) {
mdiodev = to_mdio_device(d);
if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) //判断其是否为phy设备
return to_phy_device(d);
put_device(d);
}
return NULL;
}
/**
* phy_connect_direct - connect an ethernet device to a specific phy_device
* @dev: the network device to connect
* @phydev: the pointer to the phy device
* @handler: callback function for state change notifications
* @interface: PHY device's interface
*/
int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,void (*handler)(struct net_device *),phy_interface_t interface)
{
int rc;
//遍历mdio_bus_type总线上设备列表,通过of_phy_match判断总线上设备节点和参数phy_np是否一致,一致且为phy设备则返回对应的设备结构
//如果phy设备无驱动则使用通用phy驱动,否则使用匹配的phy驱动,调用soft_reset,config_init,resume驱动
rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
if (rc)
return rc;
phy_prepare_link(phydev, handler); //phydev->adjust_link = handler; fec_enet_adjust_link
phy_start_machine(phydev); //启动phy状态机
if (phydev->irq > 0)
phy_start_interrupts(phydev); //开启phy中断线程化
return 0;
}
int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,u32 flags, phy_interface_t interface)
{
struct module *ndev_owner = dev->dev.parent->driver->owner;
struct mii_bus *bus = phydev->mdio.bus;
struct device *d = &phydev->mdio.dev;
bool using_genphy = false;
int err;
/* For Ethernet device drivers that register their own MDIO bus, we
* will have bus->owner match ndev_mod, so we do not want to increment
* our own module->refcnt here, otherwise we would not be able to
* unload later on.
*/
if (ndev_owner != bus->owner && !try_module_get(bus->owner)) {
dev_err(&dev->dev, "failed to get the bus module\n");
return -EIO;
}
get_device(d);
/* Assume that if there is no driver, that it doesn't
* exist, and we should use the genphy driver.
*/
if (!d->driver) { //无驱动则使用默认phy驱动,注意在mac驱动加载中获取获取phy_id并创建对应的phy_device并注册和phy驱动进行匹配
if (phydev->is_c45)
d->driver = &genphy_10g_driver.mdiodrv.driver;
else
d->driver = &genphy_driver.mdiodrv.driver;
using_genphy = true; //使用通用phy
}
if (!try_module_get(d->driver->owner)) {
dev_err(&dev->dev, "failed to get the device driver module\n");
err = -EIO;
goto error_put_device;
}
if (using_genphy) {
err = d->driver->probe(d);
if (err >= 0)
err = device_bind_driver(d);
if (err)
goto error_module_put;
}
//struct net_device *attached_dev;如果phy和mac已经attached则报错
if (phydev->attached_dev) {
dev_err(&dev->dev, "PHY already attached\n");
err = -EBUSY;
goto error;
}
phydev->phy_link_change = phy_link_change;
phydev->attached_dev = dev; //phy设备和mac attached
dev->phydev = phydev;
/* Some Ethernet drivers try to connect to a PHY device before
* calling register_netdevice() -> netdev_register_kobject() and
* does the dev->dev.kobj initialization. Here we only check for
* success which indicates that the network device kobject is
* ready. Once we do that we still need to keep track of whether
* links were successfully set up or not for phy_detach() to
* remove them accordingly.
*/
phydev->sysfs_links = false;
err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
"attached_dev");
if (!err) {
err = sysfs_create_link_nowarn(&dev->dev.kobj,
&phydev->mdio.dev.kobj,
"phydev");
if (err) {
dev_err(&dev->dev, "could not add device link to %s err %d\n",
kobject_name(&phydev->mdio.dev.kobj),
err);
/* non-fatal - some net drivers can use one netdevice
* with more then one phy
*/
}
phydev->sysfs_links = true;
}
phydev->dev_flags = flags;
phydev->interface = interface;
phydev->state = PHY_READY; //ready状态
/* Initial carrier state is off as the phy is about to be
* (re)initialized.
*/
netif_carrier_off(phydev->attached_dev);
/* Do initial configuration here, now that
* we have certain key parameters
* (dev_flags and interface)
*/
err = phy_init_hw(phydev); //调用soft_reset和config_init phy驱动接口
if (err)
goto error;
phy_resume(phydev); //调用resume接口
phy_led_triggers_register(phydev);
return err;
error:
/* phy_detach() does all of the cleanup below */
phy_detach(phydev);
return err;
error_module_put:
module_put(d->driver->owner);
error_put_device:
put_device(d);
if (ndev_owner != bus->owner)
module_put(bus->owner);
return err;
}
phy_attach_direct中获取检查phydev是否已经匹配了phydrv,如果无则使用通用phydrv。这就是为什么我们并没有编写工业以太Phy驱动但是phy还是能够正常工作的原因。
2、phy状态机监控phy状态的改变
phy_start接口在fec_enet_open中fec_enet_mii_probe之后被调用,用来触发phy状态机的监控。
下面把phy状态机相关的两个接口注释了并贴出:
void phy_start(struct phy_device *phydev)
{
int err = 0;
mutex_lock(&phydev->lock);
switch (phydev->state) {
case PHY_STARTING:
phydev->state = PHY_PENDING;
break;
case PHY_READY:
phydev->state = PHY_UP; //phy设备注册成功后的状态为PHY_READY
break;
case PHY_HALTED:
/* if phy was suspended, bring the physical link up again */
__phy_resume(phydev);
/* make sure interrupts are re-enabled for the PHY */
if (phy_interrupt_is_valid(phydev)) {
err = phy_enable_interrupts(phydev);
if (err < 0)
break;
}
phydev->state = PHY_RESUMING;
break;
default:
break;
}
mutex_unlock(&phydev->lock);
//取消工作,再将工作添加到工作队列上,触发phy_state_machine的执行
phy_trigger_machine(phydev, true);
}
/**
* phy_state_machine - Handle the state machine
* @work: work_struct that describes the work to be done
*/
void phy_state_machine(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct phy_device *phydev =container_of(dwork, struct phy_device, state_queue);
bool needs_aneg = false, do_suspend = false;
enum phy_state old_state;
int err = 0;
int old_link;
mutex_lock(&phydev->lock);
old_state = phydev->state; //记录旧状态
if (phydev->drv && phydev->drv->link_change_notify)
phydev->drv->link_change_notify(phydev); //执行phy驱动的link_change_notify(一般phy驱动没有实现)
switch (phydev->state) { //phy设备状态
case PHY_DOWN: //默认状态为PHY_DOWN(0)
case PHY_STARTING:
case PHY_READY: //phy注册成功后的状态为PHY_READY
case PHY_PENDING:
break;
case PHY_UP: //phy_start将PHY_READY设置为PHY_UP后开始进行自协商处理
needs_aneg = true; //需要进行协商
phydev->link_timeout = PHY_AN_TIMEOUT; //10
break;
case PHY_AN: //phy_start_aneg_priv将PHY_UP设置为PHY_AN
err = phy_read_status(phydev); //调用phy驱动的read_status接口,根据phy寄存器结果设置 phydev->link,phydev->duplex,phydev->speed的值
if (err < 0)
break;
/* If the link is down, give up on negotiation for now */
if (!phydev->link) { //在phy的read_status中设置link speed duplex的情况
phydev->state = PHY_NOLINK; //phy 状态寄存器显示未link,设置状态为PHY_NOLINK
phy_link_down(phydev, true);
break; //跳出
}
/* Check if negotiation is done. Break if there's an error */
err = phy_aneg_done(phydev); //在phy已经link的情况下,调用phy驱动aneg_done,查看自协商的结果
if (err < 0)
break;
/* If AN is done, we're running */
if (err > 0) { /*协商成功后返回值需要大于0*/
phydev->state = PHY_RUNNING; //设置phy状态为PHY_RUNNING
phy_link_up(phydev);
} else if (0 == phydev->link_timeout--) /*如果协商未完成,返回值为0,link_timeout--*/
needs_aneg = true; //继续进行自协商处理(最大协商时间10s)
break;
case PHY_NOLINK:
if (phy_interrupt_is_valid(phydev))
break;
err = phy_read_status(phydev); //获取phy状态
if (err)
break;
if (phydev->link) { /*linkup*/
if (AUTONEG_ENABLE == phydev->autoneg) {
err = phy_aneg_done(phydev); //phy设备已经link,查询协商结果
if (err < 0) //aneg_done的返回值不能小于0
break;
if (!err) { //协商未完成,则继续进行协商处理(最大协商时间10s)
phydev->state = PHY_AN;
phydev->link_timeout = PHY_AN_TIMEOUT;
break;
}
}
phydev->state = PHY_RUNNING; //协商完成,设置为PHY_RUNNING状态
phy_link_up(phydev);
}
break;
case PHY_FORCING:
err = genphy_update_link(phydev);
if (err)
break;
if (phydev->link) {
phydev->state = PHY_RUNNING;
phy_link_up(phydev);
} else {
if (0 == phydev->link_timeout--)
needs_aneg = true;
phy_link_down(phydev, false);
}
break;
case PHY_RUNNING: //phy已经link且完成协商
/* Only register a CHANGE if we are polling and link changed
* since latest checking.
*/
if (phydev->irq == PHY_POLL) {
old_link = phydev->link;
err = phy_read_status(phydev); //PHY_RUNNING的情况下仍然会每秒获取一次phy状态
if (err)
break;
if (old_link != phydev->link)
phydev->state = PHY_CHANGELINK;
}
/*
* Failsafe: check that nobody set phydev->link=0 between two
* poll cycles, otherwise we won't leave RUNNING state as long
* as link remains down.
*/
if (!phydev->link && phydev->state == PHY_RUNNING) {
phydev->state = PHY_CHANGELINK; //link状态发生改变
phydev_err(phydev, "no link in PHY_RUNNING\n");
}
break;
case PHY_CHANGELINK:
err = phy_read_status(phydev); //获取phy状态
if (err)
break;
if (phydev->link) {
phydev->state = PHY_RUNNING;
phy_link_up(phydev);
} else {
phydev->state = PHY_NOLINK;
phy_link_down(phydev, true);
}
if (phy_interrupt_is_valid(phydev))
err = phy_config_interrupt(phydev,
PHY_INTERRUPT_ENABLED);
break;
case PHY_HALTED:
if (phydev->link) {
phydev->link = 0;
phy_link_down(phydev, true);
do_suspend = true; //暂停处理
}
break;
case PHY_RESUMING:
if (AUTONEG_ENABLE == phydev->autoneg) {
err = phy_aneg_done(phydev); //获取自协商结果
if (err < 0)
break;
/* err > 0 if AN is done.
* Otherwise, it's 0, and we're still waiting for AN
*/
if (err > 0) { //自协商完成
err = phy_read_status(phydev);
if (err)
break;
if (phydev->link) {
phydev->state = PHY_RUNNING;
phy_link_up(phydev);
} else {
phydev->state = PHY_NOLINK;
phy_link_down(phydev, false);
}
} else { //自协商未完成则继续进行自协商处理
phydev->state = PHY_AN;
phydev->link_timeout = PHY_AN_TIMEOUT;
}
} else {
err = phy_read_status(phydev);
if (err)
break;
if (phydev->link) {
phydev->state = PHY_RUNNING;
phy_link_up(phydev);
} else {
phydev->state = PHY_NOLINK;
phy_link_down(phydev, false);
}
}
break;
}
mutex_unlock(&phydev->lock);
if (needs_aneg) //开始自协商处理
err = phy_start_aneg_priv(phydev, false);
else if (do_suspend)
phy_suspend(phydev); //执行phy驱动suspend
if (err < 0)
phy_error(phydev);
if (old_state != phydev->state)
phydev_dbg(phydev, "PHY state change %s -> %s\n",phy_state_to_str(old_state),phy_state_to_str(phydev->state));
/* Only re-schedule a PHY state machine change if we are polling the
* PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
* between states from phy_mac_interrupt()
*/
if (phydev->irq == PHY_POLL) //轮询情况,工作加入延迟队列。phy中断开启的情况下由中断处理程序调用phy_trigger_machine将工作加入延迟队列
queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,PHY_STATE_TIME * HZ);/*1HZ*/
}
上面的将phy状态机监控的整个流程梳理清楚,其中的部分细节需要读者去细读函数中的细节部分。尤其是状态机之间的跳变过程。其中phy驱动中的 aneg_done的返回值需要注意,协商完成返回值大于0;返回值等于0的话,意味着当前协商未完成,会继续进行协商,总共协商10次,10次协商未完成的话则失败;返回值小于0的话,意味着协商失败。
phy的状态机是1HZ(1s)为单位触发一次。如果phy最终状态为RUNNING后,则每1HZ调用一次phydrv->read_status获取当前phy的link状态,这就解释了为什么插拔网线,linux能够感知到phy link down和link up状态的变化,从而触发新一轮phy状态的变化。
其中的很多细节还需要读者去深入阅读源码。下一节将讲解fixed-link相关的内容。