在比较早的时候就发现RT-Thread的CAN驱动存在一些问题。当时并没有去记录,最近又因为这个驱动问题搞的一个头两个大。因为产品已经发到国外了,就算进行升级也是比较麻烦的。
RT-Thread的CAN驱动在CAN没有出错的时候是可以正常工作的,一旦CAN总线出现一问题,这个驱动就变得不那么稳定了。我觉得在CAN总线发生故障状态下,这个驱动会出现问题的点有以下几个:
rt_inline int _can_int_tx(struct rt_can_device *can, const struct rt_can_msg *data, int msgs)
{
int size;
struct rt_can_tx_fifo *tx_fifo;
RT_ASSERT(can != RT_NULL);
size = msgs;
tx_fifo = (struct rt_can_tx_fifo *) can->can_tx;
RT_ASSERT(tx_fifo != RT_NULL);
while (msgs)
{
rt_base_t level;
rt_uint32_t no;
rt_uint32_t result;
struct rt_can_sndbxinx_list *tx_tosnd = RT_NULL;
rt_sem_take(&(tx_fifo->sem), RT_WAITING_FOREVER);
level = rt_hw_interrupt_disable();
tx_tosnd = rt_list_entry(tx_fifo->freelist.next, struct rt_can_sndbxinx_list, list);
RT_ASSERT(tx_tosnd != RT_NULL);
rt_list_remove(&tx_tosnd->list);
rt_hw_interrupt_enable(level);
no = ((rt_uint32_t)tx_tosnd - (rt_uint32_t)tx_fifo->buffer) / sizeof(struct rt_can_sndbxinx_list);
tx_tosnd->result = RT_CAN_SND_RESULT_WAIT;
if (can->ops->sendmsg(can, data, no) != RT_EOK)
{
/* send failed. */
level = rt_hw_interrupt_disable();
rt_list_insert_after(&tx_fifo->freelist, &tx_tosnd->list);
rt_hw_interrupt_enable(level);
rt_sem_release(&(tx_fifo->sem));
continue;
}
can->status.sndchange = 1;
rt_completion_wait(&(tx_tosnd->completion), RT_WAITING_FOREVER);
level = rt_hw_interrupt_disable();
result = tx_tosnd->result;
if (!rt_list_isempty(&tx_tosnd->list))
{
rt_list_remove(&tx_tosnd->list);
}
rt_list_insert_before(&tx_fifo->freelist, &tx_tosnd->list);
rt_hw_interrupt_enable(level);
rt_sem_release(&(tx_fifo->sem));
if (result == RT_CAN_SND_RESULT_OK)
{
level = rt_hw_interrupt_disable();
can->status.sndpkg++;
rt_hw_interrupt_enable(level);
data ++;
msgs -= sizeof(struct rt_can_msg);
if (!msgs) break;
}
else
{
level = rt_hw_interrupt_disable();
can->status.dropedsndpkg++;
rt_hw_interrupt_enable(level);
break;
}
}
return (size - msgs);
}
static rt_err_t rt_can_open(struct rt_device *dev, rt_uint16_t oflag)
{
struct rt_can_device *can;
char tmpname[16];
RT_ASSERT(dev != RT_NULL);
can = (struct rt_can_device *)dev;
CAN_LOCK(can);
/* get open flags */
dev->open_flag = oflag & 0xff;
if (can->can_rx == RT_NULL)
{
if (oflag & RT_DEVICE_FLAG_INT_RX)
{
int i = 0;
struct rt_can_rx_fifo *rx_fifo;
rx_fifo = (struct rt_can_rx_fifo *) rt_malloc(sizeof(struct rt_can_rx_fifo) +
can->config.msgboxsz * sizeof(struct rt_can_msg_list));
RT_ASSERT(rx_fifo != RT_NULL);
rx_fifo->buffer = (struct rt_can_msg_list *)(rx_fifo + 1);
rt_memset(rx_fifo->buffer, 0, can->config.msgboxsz * sizeof(struct rt_can_msg_list));
rt_list_init(&rx_fifo->freelist);
rt_list_init(&rx_fifo->uselist);
rx_fifo->freenumbers = can->config.msgboxsz;
for (i = 0; i < can->config.msgboxsz; i++)
{
rt_list_insert_before(&rx_fifo->freelist, &rx_fifo->buffer[i].list);
#ifdef RT_CAN_USING_HDR
rt_list_init(&rx_fifo->buffer[i].hdrlist);
rx_fifo->buffer[i].owner = RT_NULL;
#endif
}
can->can_rx = rx_fifo;
dev->open_flag |= RT_DEVICE_FLAG_INT_RX;
/* open can rx interrupt */
can->ops->control(can, RT_DEVICE_CTRL_SET_INT, (void *)RT_DEVICE_FLAG_INT_RX);
}
}
if (can->can_tx == RT_NULL)
{
if (oflag & RT_DEVICE_FLAG_INT_TX)
{
int i = 0;
struct rt_can_tx_fifo *tx_fifo;
tx_fifo = (struct rt_can_tx_fifo *) rt_malloc(sizeof(struct rt_can_tx_fifo) +
can->config.sndboxnumber * sizeof(struct rt_can_sndbxinx_list));
RT_ASSERT(tx_fifo != RT_NULL);
tx_fifo->buffer = (struct rt_can_sndbxinx_list *)(tx_fifo + 1);
rt_memset(tx_fifo->buffer, 0,
can->config.sndboxnumber * sizeof(struct rt_can_sndbxinx_list));
rt_list_init(&tx_fifo->freelist);
for (i = 0; i < can->config.sndboxnumber; i++)
{
rt_list_insert_before(&tx_fifo->freelist, &tx_fifo->buffer[i].list);
rt_completion_init(&(tx_fifo->buffer[i].completion));
tx_fifo->buffer[i].result = RT_CAN_SND_RESULT_OK;
}
rt_sprintf(tmpname, "%stl", dev->parent.name);
rt_sem_init(&(tx_fifo->sem), tmpname, can->config.sndboxnumber, RT_IPC_FLAG_FIFO);
can->can_tx = tx_fifo;
dev->open_flag |= RT_DEVICE_FLAG_INT_TX;
/* open can tx interrupt */
can->ops->control(can, RT_DEVICE_CTRL_SET_INT, (void *)RT_DEVICE_FLAG_INT_TX);
}
}
can->ops->control(can, RT_DEVICE_CTRL_SET_INT, (void *)RT_DEVICE_CAN_INT_ERR);
#ifdef RT_CAN_USING_HDR
if (can->hdr == RT_NULL)
{
int i = 0;
struct rt_can_hdr *phdr;
phdr = (struct rt_can_hdr *) rt_malloc(can->config.maxhdr * sizeof(struct rt_can_hdr));
RT_ASSERT(phdr != RT_NULL);
rt_memset(phdr, 0, can->config.maxhdr * sizeof(struct rt_can_hdr));
for (i = 0; i < can->config.maxhdr; i++)
{
rt_list_init(&phdr[i].list);
}
can->hdr = phdr;
}
#endif
if (!can->timerinitflag)
{
can->timerinitflag = 1;
rt_timer_start(&can->timer);
}
CAN_UNLOCK(can);
return RT_EOK;
}
- RT-Thread的CAN驱动在将数据写入发送邮箱的时候如果出错,会形成死循环.。
如果if (can->ops->sendmsg(can, data, no) != RT_EOK)
出现问题,会continue回到while,如果一直不成功,就一直循环下去成为死循环。之前我在处理这个问题的时候是进行计数,如果写入失败将做出一定的处理,而不是一直在这里死循环。 - 将数据写入发送邮箱之后,会以
FOREVER
的形式等在完成量上rt_completion_wait(&(tx_tosnd->completion), RT_WAITING_FOREVER);
,如果一直发送失败,这个完成量无法执行则会将调用发送的线程一直挂起,这个线程如果有其它的事务需要处理将导致一些问题。 open
函数默认开启了CAN的错误中断,如果CAN一直发送一直出错,程序将一直进行错误中断服务函数。这将消耗大量的运算资源,甚至整个程序都得不到正常的运行。
当然,这些问题都是有办法解决的,只是在不了解这些潜在问题的情况下如果遇到问题将很难找到问题所在。
到目前为止好像官方都没有提交有效解决这些问题的代码,嗯……只能自己改巴改巴了。
RT-Thread社区也有人对一些问题提出解决方法。例如https://club.rt-thread.org/ask/article/3034.html,其实这些问题更难的是如何发现,像出现以上问题程序并不会跑崩,这就需要深入分析出现的异常现象是什么导致的,找到问题,解决办法总是有的。