etherCAT igh-master接收超时问题

此博客为一次记录

现象:
        在应用层调用receive时,偶发有接收时间超过1ms的问题(工作频率是1000HZ,所以接收时间一般在100us以内才正常),甚至偶有4,5ms的时候。

环境

        rtlinux+igh1.5.2

排查:

        1 首先在驱动和lib库里增加计时操作,确定超时确实是由etherCAT导致。

        2 细化计时范围,缩小至由于锁导致的receive的ioctl返回时间慢。

        3 改写igh驱动,将锁由原来的inline改写为带计时功能的宏,并且将上次加锁的代码行号记录下来。

原本的锁 master/lock.h

static inline void ec_lock_init(ec_lock_t *sem) { sema_init(sem, 1); }

static inline void ec_lock_down(ec_lock_t *sem) { down(sem); }

static inline int ec_lock_down_interruptible(ec_lock_t *sem) { return down_interruptible(sem); }

static inline void ec_lock_up(ec_lock_t *sem) { up(sem);}

改写为

extern ktime_t start_time_ns_test;

extern ktime_t end_time_ns_test;

extern s64 elapsed_time_ns_test;

extern ktime_t start_time_ns_test2;

extern ktime_t end_time_ns_test2;

extern s64 elapsed_time_ns_test2;

extern volatile ktime_t start_time_ns_test3;

extern volatile ktime_t end_time_ns_test3;

extern volatile ktime_t lock_time[1024];

extern s64 elapsed_time_ns_test3;

extern int temporaryRet;

extern volatile int lock_array[1024];

extern volatile int master_fsm;

extern volatile int master_fsm_reason;

extern volatile s64 schedule_elapsed_time;

#define OVERTIME 300000

//#define ec_lock_init(sem) do { start_time_ns_test=ktime_get(); sema_init((sem), 1); end_time_ns_test=ktime_get();if((elapsed_time_ns_test=ktime_to_ns(ktime_sub(end_time_ns_test, start_time_ns_test)))>OVERTIME){printk("xiuwen%s.c: %d %s too long cost %lld\n",__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test);}} while (0)

#define ec_lock_init(sem) do{ sema_init(sem, 1); printk("xiuwen%s.c: %d %s idx %d\n",__FILE__,__LINE__,__FUNCTION__,(uintptr_t)sem%1024);} while(0)

#define ec_lock_down(sem) do \

{\

start_time_ns_test2=ktime_get(); down(sem);end_time_ns_test2=ktime_get();\

if((elapsed_time_ns_test2=ktime_to_ns(ktime_sub(end_time_ns_test2, start_time_ns_test2)))>OVERTIME)\

{\

printk("xiuwen%s: %d %s lock down too long cost %lld lastLine %d idx %ld pid %d tid %d master %d reason %dn",\

__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test2,lock_array[(uintptr_t)sem%1024],(uintptr_t)sem%1024,current->pid,current->tgid,master_fsm,master_fsm_reason);\

}\

lock_array[(uintptr_t)sem%1024]=__LINE__;\

lock_time[(uintptr_t)sem%1024]=end_time_ns_test2;\

} while (0)

#define ec_lock_down_interruptible(sem) ((((start_time_ns_test=ktime_get())&&(0==(temporaryRet=down_interruptible((sem)))))&&(end_time_ns_test=ktime_get()))&&\

((elapsed_time_ns_test=ktime_to_ns(ktime_sub(end_time_ns_test, start_time_ns_test)))>OVERTIME?\

(NULL!=(1+(printk("xiuwen%s: %d %s lock_down_interrupt too long %lld lastLine %d idx %ld pid %d tid %d master %d reason %d\n",\

__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test,lock_array[(uintptr_t)sem%1024],(uintptr_t)sem%1024,current->pid,current->tgid,master_fsm,master_fsm_reason)\

))):1),(lock_array[(uintptr_t)sem%1024]=__LINE__),(lock_time[(uintptr_t)sem%1024]=end_time_ns_test),temporaryRet)

#define ec_lock_up(sem) do \

{ start_time_ns_test3=ktime_get(); \

up(sem);\

end_time_ns_test3=ktime_get();\

if((elapsed_time_ns_test3=ktime_to_ns(ktime_sub(end_time_ns_test3, start_time_ns_test3)))>OVERTIME)\

{\

printk("xiuwen%s: %d %s lock up too long cost %lld lastLine %d idx %ld pid %d tid %d master %d reason %d\n",\

__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test3,lock_array[(uintptr_t)sem%1024],(uintptr_t)sem%1024,current->pid,current->tgid ,master_fsm,master_fsm_reason);\

}\

if((elapsed_time_ns_test3=ktime_to_ns(ktime_sub(end_time_ns_test3, lock_time[(uintptr_t)sem%1024])))>OVERTIME)\

{\

printk("xiuwen%s: %d %s down lock spend to long time %lld,",__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test3);\

}\

lock_array[(uintptr_t)sem%1024]=__LINE__;\

} while (0)

4 运行测试发现,锁被etherCAT驱动的operation_pthread线程锁持有时间过长

master/master.c

5 读代码发现锁是因为执行slave状态机偶尔会出现过慢的情况,导致锁未被释放,进而导致上层接收超时,想追状态机函数,由于每次超时的子状态机不是同一个,且比较复杂就放弃了,想着从其他方向入手。

6 观察到应用层的调度策略是fifo,优先级是最高,而master的op线程却是普通调度策略,感觉像是状态机执行的次数少,然后出了某些意外,在下次执行状态机时去处理这个意外才导致此次时间过长。所以把OP线程同样改为FIFO,且优先级最高。

master/master.c : ec_master_operation_thread

static int ec_master_operation_thread(void *priv_data)

{

ec_master_t *master = (ec_master_t *) priv_data;

// struct cpumask mask;

struct sched_param param;

param.sched_priority = 99; // 设置优先级为99

if (sched_setscheduler(current, SCHED_FIFO, &param))

{

pr_err("Failed to set scheduler\n");

return -1;

}

EC_MASTER_DBG(master, 1, "Operation thread running"

" with fsm interval = %u us, max data size=%zu\n",

master->send_interval, master->max_queue_size);

while (!kthread_should_stop()) {

ec_datagram_output_stats(&master->fsm_datagram);

if (master->injection_seq_rt == master->injection_seq_fsm) {

// output statistics

ec_master_output_stats(master);

// execute master & slave state machines

if (ec_lock_down_interruptible(&master->master_sem)) {

break;

}

if (ec_fsm_master_exec(&master->fsm)) {

// Inject datagrams (let the RT thread queue them, see

// ecrt_master_send())

master->injection_seq_fsm++;

}

// if rt_slave_requests is true and the slaves are available

// this will be handled by the app explicitly calling

// ecrt_master_exec_slave_request()

if (!master->rt_slave_requests || !master->rt_slaves_available) {

ec_master_exec_slave_fsms(master);

}

ec_lock_up(&master->master_sem);

}

#ifdef EC_USE_HRTIMER

// the op thread should not work faster than the sending RT thread

ec_master_nanosleep(master->send_interval * 1000);

#else

if (ec_fsm_master_idle(&master->fsm)) {

set_current_state(TASK_INTERRUPTIBLE);

schedule_timeout(1);

}

else {

schedule();

}

#endif

}

EC_MASTER_DBG(master, 1, "Master OP thread exiting...\n");

return 0;

}

7 改完之后再次运行,未再出现过超时问题,使用stress-ng进行mem和cpu加压满载测试同样正常运行超过24H。

结论:

etherCAT协议还是比较复杂,igh的状态机不好排查,所以目前只排查到是进程调度策略导致的master的op线程被调度少或者抢占了才导致的(但是igh的OP线程是自旋锁,按理来说不应该会被抢占才对)。如果后续继续排查,有两个方向
1 OP线程循环执行时关中断,等此次循环结束再开中断来验证是否是被抢占导致的。
2 追slave状态机的代码

PS :

如果有大佬知道具体原因,麻烦留言说下哈。

1.基于Igh-master1.5.2的多种Ethercat IO 模块及伺服电机的操作例子 2.基于Ethercat-Ighmaster二次开发库简化你的开发流程 所有源码及文档托管在码云平台,附件中有代码仓库地址 3.含有IghMaster1.5.2最新版本代码,编译脚本及安装到 最新的代码托管在: https://gitee.com/wllw7176/MyEthercat-IGH-1.5.2.git EtherCAT 1.5.2 编译及使用说明 1.交叉编译源码 源码中有内核模块编译,指令定模块目录 根据实际情况修改build_ethercat-1.5.2.sh中以下几个变量: output_dir='output' #编译输出目录 module_install_dir='module_install' #内核模块安装目录 kernel_source_dir='/mnt/fs_ext/imx6/linux-3.0.35' #内核源码目录 lib_modules_kernel_promt='3.0.35-2666-gbdde708' #内核版本号 host=arm-fsl-linux-gnueabi #交叉编译链前缀 注意要先编译内核,然后编译Ethrecat因为Ethercat依赖于内核通用网卡模块 以root用户运行./build_ethercat-1.5.2.sh #编译具体根据提示选择y/n就可以了 ---------------------------------------------------------------------------------------------------------------------------------------------- 手动安装Ethercat到ARM开发板: 2.内核模块安装 2.1 将output文件下通lib_modules_kernel_promt='3.0.35-2666-gbdde708'(build_ethercat-1.5.2.sh 中定义)放入开发板/lib/modules下 2.2 开发板中执行"depmod"命令 3. output里面的其它文件放入开发板对应位置 4. 设置参数 修改/etc/sysconfig/ethercat MASTER0_DEVICE="e4:f3:f5:c6:41:b6" #与ethercat绑定的Mac地址 DEVICE_MODULES="generic" #通用网卡就填generic,其余支持网卡换成模块名字就行 5.添加udev规则 echo KERNEL==\"EtherCAT[0-9]*\", MODE=\"0664\" > /etc/udev/rules.d/99-EtherCAT.rules 6. 启动服务 /etc/init.d/ethercat restart 出现以下信息表明移植成功 Shutting down EtherCAT master 1.5.2 done Starting EtherCAT master 1.5.2 ec_generic: Binding socket to interface 3 (eth0). done 7. 应用层测试 root@linaro-ubuntu-desktop:~# ethercat Please specify a command! Usage: ethercat [OPTIONS] [ARGUMENTS] Commands (can be abbreviated): alias Write alias addresses. config Show slave configurations. cstruct Generate slave PDO information in C language. data Output binary domain process data. debug Set the master's debug level. domains Show configured domains. download Write an SDO entry to a slave. eoe Display Ethernet over EtherCAT statictics. foe_read Read a file from a slave via FoE. foe_write Store a file on a slave via FoE. graph Output the bus topology as a graph. master Show master and Ethernet device information. pdos List Sync managers, PDO assignment and mapping. reg_read Output a slave's register contents. reg_write Write data to a slave's registers. rescan Rescan the bus. sdos List SDO dictionaries. sii_read Output a slave's SII contents. sii_write Write SII contents to a slave. slaves Display slaves on the bus. soe_read Read an SoE IDN from a slave. soe_write Write an SoE IDN to a slave. states Request application-layer states. upload Read an SDO entry from a slave. version Show version information. xml Generate slave information XML. Global options: --master -m Comma separated list of masters to select, ranges are allowed. Examples: '1,3', '5-7,9', '-3'. Default: '-' (all). --force -f Force a command. --quiet -q Output less information. --verbose -v Output more information. --help -h Show this help. Numerical values can be specified either with decimal (no prefix), octal (prefix '0') or hexadecimal (prefix '0x') base. Call 'ethercat --help' for command-specific help. 以上内容为基本的使用,进一步使用要结合ethercat说明文档电机说明。 ---------------------------------------------------------------------------------------------------------------------------------------------- 自动安装Ethercat到ARM开发板: 将output目录复制到开发板然后运行output目录下install_to_arm.sh脚本 执行6,7步骤测试自动安装是否成功 ---------------------------------------------------------------------------------------------------------------------------------------------- 问题汇总: (1)Starting EtherCAT master 1.5.2 EtherCAT ERROR: MAC address may not be empty. FATAL: Error inserting ec_master (/lib/modules/3.0.35-2666-gbdde708-gbdbf2583/mnt/hgfs/win_linux_share/linux/download/ethercat-1.5.2/module_install/master/ec_master.ko): Invalid argument failed Mac地址为空,将对应的mac地址MASTER0_DEVICE="e4:f3:f5:c6:41:b6" (2)编译Ethercat 一定要保证所用编译器与内核与硬件平台一致,否则会出现内核模块不能使用或者其它未知问题 (3)内核源码要先编译通过,否则Ethercat模块将不能编译通过,因为Ethercat模块依赖于内核本身驱动模块。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值