龙芯平台fio异步无法测试问题

概要

fio是一个非常灵活的io测试工具,他可以通过多线程或进程模拟各种io操作

随着块设备的发展,特别是SSD盘的出现,设备的并行度越来越高。要想利用好这些设备,有个诀窍就是提高设备的iodepth, 一次喂给设备更多的IO请求,让电梯算法和设备有机会来安排合并以及内部并行处理,提高总体效率。

应用程序使用IO通常有二种方式:同步和异步。 同步的IO一次只能发出一个IO请求,等待内核完成才返回,这样对于单个线程iodepth总是小于1,但是可以通过多个线程并发执行来解决,通常我们会用16-32个线程同时工作把iodepth塞满。 异步的话就是用类似libaio这样的linux native aio一次提交一批,然后等待一批的完成,减少交互的次数,会更有效率。

调试环境

  • 验证系统
    loongnix1.0版本

  • 验证工具版本
    fio-3.12
    libaio-0.3.112

问题说明

在龙芯平台上,使用fio同步测试是没有问题的,但异步是无法跑起来的,报错是调用io_submit接口时传的参数不正确,需要说明的是,fio异步使用的是libaio库实现的,实际使用的是内核中的接口。

调试步骤

从fio源码中定位

通过报错位置找到了td_verror(td, -ret, "io commit");,说明td->io_ops->commit(td); 出问题了
fio/ioengines.c

void td_io_commit(struct thread_data *td)
{
	int ret;

	dprint(FD_IO, "calling ->commit(), depth %d\n", td->cur_depth);

	if (!td->cur_depth || !td->io_u_queued)
		return;

	io_u_mark_depth(td, td->io_u_queued);

	if (td->io_ops->commit) {
		ret = td->io_ops->commit(td);
		if (ret)
			td_verror(td, -ret, "io commit");
	}

	/*
	 * Reflect that events were submitted as async IO requests.
	 */
	td->io_u_in_flight += td->io_u_queued;
	td->io_u_queued = 0;
}

查找函数指针td->io_ops->commit(td);初始化的地方,通过打印判定是static int fio_libaio_commit(struct thread_data *td)

fio/engines/libaio.c

static int fio_libaio_commit(struct thread_data *td)
{
	struct libaio_data *ld = td->io_ops_data;
	struct iocb **iocbs;
	struct io_u **io_us;
	struct timespec ts;
	int ret, wait_start = 0;

	if (!ld->queued)
		return 0;

	do {
		long nr = ld->queued;

		nr = min((unsigned int) nr, ld->entries - ld->tail);
		io_us = ld->io_us + ld->tail;
		iocbs = ld->iocbs + ld->tail;

		ret = io_submit(ld->aio_ctx, nr, iocbs);
		if (ret > 0) {
			fio_libaio_queued(td, io_us, ret);
			io_u_mark_submit(td, ret);

			ld->queued -= ret;
			ring_inc(ld, &ld->tail, ret);
			ret = 0;
			wait_start = 0;
		} else if (ret == -EINTR || !ret) {
			if (!ret)
				io_u_mark_submit(td, ret);
			wait_start = 0;
			continue;
		} else if (ret == -EAGAIN) {
			/*
			 * If we get EAGAIN, we should break out without
			 * error and let the upper layer reap some
			 * events for us. If we have no queued IO, we
			 * must loop here. If we loop for more than 30s,
			 * just error out, something must be buggy in the
			 * IO path.
			 */
			if (ld->queued) {
				ret = 0;
				break;
			}
			if (!wait_start) {
				fio_gettime(&ts, NULL);
				wait_start = 1;
			} else if (mtime_since_now(&ts) > 30000) {
				log_err("fio: aio appears to be stalled, giving up\n");
				break;
			}
			usleep(1);
			continue;
		} else if (ret == -ENOMEM) {
			/*
			 * If we get -ENOMEM, reap events if we can. If
			 * we cannot, treat it as a fatal event since there's
			 * nothing we can do about it.
			 */
			if (ld->queued)
				ret = 0;
			break;
		} else
			break;
	} while (ld->queued);

	return ret;
}

通过打印断定是io_submit出问题了,下面是io_submit的原型,查看发现就是宏定义接口,查看libaio源码中的接口

#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2,type3,arg3) \
type fname(type1 arg1,type2 arg2,type3 arg3) \
_body_io_syscall(sname, (long)arg1, (long)arg2, (long)arg3)

io_syscall3(int, io_submit, io_submit, io_context_t, ctx, long, nr, struct iocb **, iocbs)

查看libaio接口发现libaio实现的是数据结构,时间的接口调用的是内核的
libaio-0.3.112/src/libaio.h

extern int io_setup(int maxevents, io_context_t *ctxp);
extern int io_destroy(io_context_t ctx);
extern int io_submit(io_context_t ctx, long nr, struct iocb *ios[]);
extern int io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt);
extern int io_getevents(io_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout);
extern int io_pgetevents(io_context_t ctx_id, long min_nr, long nr,

查看内核中aio.c如下,发现到内核的数据不正确了

long do_io_submit(aio_context_t ctx_id, long nr,
		  struct iocb __user *__user *iocbpp, bool compat)
{
	struct kioctx *ctx;
	long ret = 0;
	int i = 0;
	struct blk_plug plug;
	printk("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!%d\n",__LINE__);
	printk(KERN_DEBUG "line%d,addr:%p,%p\n",__LINE__,*iocbpp,iocbpp);
	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
	if (unlikely(nr < 0))
		return -EINVAL;
	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
		nr = LONG_MAX/sizeof(*iocbpp);

	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
		return -EFAULT;

	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
	ctx = lookup_ioctx(ctx_id);
	if (unlikely(!ctx)) {
		pr_debug("EINVAL: invalid context id\n");
		return -EINVAL;
	}

	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
	blk_start_plug(&plug);

	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
	/*
	 * AKPM: should this return a partial result if some of the IOs were
	 * successfully submitted?
	 */
	for (i=0; i<nr; i++) {
		struct iocb __user *user_iocb;
		struct iocb tmp;

		printk(KERN_DEBUG "line%d,addr:%p\n",__LINE__,user_iocb);
		printk(KERN_DEBUG "line%d,addr:%p\n",__LINE__,&tmp);
		if (unlikely(__get_user(user_iocb, iocbpp + i))) {
			ret = -EFAULT;
			break;
		}

		printk(KERN_DEBUG "line%d,addr:%p\n",__LINE__,user_iocb);
		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
			ret = -EFAULT;
			break;
		}
		printk(KERN_DEBUG "line%d,addr:%p-%d\n",__LINE__,&tmp,sizeof(tmp));

	printk(KERN_DEBUG "line:%d,func:%s\n",__LINE__,__func__);
		ret = io_submit_one(ctx, user_iocb, &tmp, compat);
		if (ret)
			break;
	}
	blk_finish_plug(&plug);

	put_ioctx(ctx);
	return i ? i : ret;
}

同时打印出内核和应用中的数据进行对比,发现了问题,内核中的数据长度是64byte,而应用中的数据长度是83byte,这样数据肯定是对不上的,所以查看原因,发现是libaio中的问题。

问题解决

libaio源码本身是部支持mips64el的,为了修改支持需要修改两个地方syscall.h和libaio.h,

syscall.h中添加__mips64和syscall-mips64el.h
__mips64是gcc中定义的宏

#if defined(__i386__)
#include "syscall-i386.h"
#elif defined(__x86_64__)
#include "syscall-x86_64.h"
#elif defined(__mips64)
#include "syscall-mips64el.h"
#elif defined(__ia64__)
#include "syscall-ia64.h"
#elif defined(__PPC__)
#include "syscall-ppc.h"
#elif defined(__s390__)
#include "syscall-s390.h"
#elif defined(__alpha__)
#include "syscall-alpha.h"
#elif defined(__arm__)
#include "syscall-arm.h"
#elif defined(__sparc__)
#include "syscall-sparc.h"
#elif defined(__aarch64__) || defined(__riscv)
#include "syscall-generic.h"
#else
#warning "using system call numbers from sys/syscall.h"
#endif

syscall-mips64el.h内容如下,从~~linux/include/asm-arm/unistd.h~~ /usr/include/asm/unistd.h中获取

#define __NR_Linux                      5000
#define __NR_io_setup                   (__NR_Linux + 200)
#define __NR_io_destroy                 (__NR_Linux + 201)
#define __NR_io_getevents               (__NR_Linux + 202)
#define __NR_io_submit                  (__NR_Linux + 203)
#define __NR_io_cancel                  (__NR_Linux + 204)

libaio.h 添加内容如下,在64 bits处添加__mips64

/* little endian, 32 bits */
#if defined(__i386__) || (defined(__arm__) && !defined(__ARMEB__)) || \
    defined(__sh__) || defined(__bfin__) || (defined(__MIPSEL__) && defined(__mips32)) || \
    defined(__cris__) || (defined(__riscv) && __riscv_xlen == 32) || \
    (defined(__GNUC__) && defined(__BYTE_ORDER__) && \
         __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG__ == 4)
#define PADDED(x, y)    x; unsigned y
#define PADDEDptr(x, y) x; unsigned y
#define PADDEDul(x, y)  unsigned long x; unsigned y

/* little endian, 64 bits */
#elif defined(__ia64__) || defined(__x86_64__) || defined(__alpha__) ||         \
        (defined(__MIPSEL__) && defined(__mips64)) ||   \
      (defined(__aarch64__) && defined(__AARCH64EL__)) || \
      (defined(__riscv) && __riscv_xlen == 64) || \
      (defined(__GNUC__) && defined(__BYTE_ORDER__) && \
          __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG__ == 8)
#define PADDED(x, y)    x, y
#define PADDEDptr(x, y) x
#define PADDEDul(x, y)  unsigned long x

注意:
在libaio.h中32bit部分是有__MIPSEL__判断的,我们gcc中也有这个宏,所以需要添加其他判断来限制,因为32bit和64bit都有__MIPSEL__,添加__mips32 宏做限制,应用和内核数据对不上就是这块的原因

编译命令

libaio

make clean;make ;make install

fio

make clean;make

需要安装可以执行

make install

验证问题命令

fio -filename=/dev/sdb -direct=1 -iodepth=1 -thread -rw=write -ioengine=libaio -bs=4k -size=10G -numjobs=4 -runtime=120 -group_reporting -name=mytest

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值