问题描述:
- 在使用
fio
进行虚拟机磁盘(Ceph
的RBD
,格式化为ext4
文件系统)的IOPS
测试时,发现randread
比预估值高许多; - 在使用相同参数进行
randwrite
测试之后,再进行randread
时会出现此现象; - 而使用
dd
构建测试文件后,再进行randread
就不会出现这种情况,IOPS
数值正常。
初步推测,可能fio
的随机是伪随机,导致前后的randwrite
和randread
使用了相同的伪随机序列。文件系统在进行物理块分配时从前往后分配,在逻辑上随机的块实际上是顺序写入物理磁盘,最终的随机读实际上是顺序读,导致IO
被磁盘调度器合并,实际IO
次数变少,所以测试的IOPS
偏大,为此进行详细分析测试。
打印Debug日志
- 打开
fio
的debug
模式,执行测试,输出日志:
$ fio -direct=1 -iodepth=128 -rw=randwrite -ioengine=libaio -bs=4k -size=1G -numjobs=1 -runtime=10 -group_reporting -filename=iotest -name=Rand_Write_Testing --debug=random > rand_write_offset.log
$ fio -direct=1 -iodepth=128 -rw=randread -ioengine=libaio -bs=4k -size=1G -numjobs=1 -runtime=10 -group_reporting -filename=iotest -name=Rand_Read_Testingg --debug=random > rand_read_offset.log
- 查看日志:
$ head -n30 rand_write_offset.log
fio: set debug option random
Rand_Write_Testing: (g=0): rw=randwrite, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=libaio, iodepth=128
fio-3.1
Starting 1 process
random 4057532 off rand 259043585
random 4057532 off rand 3179521932
random 4057532 off rand 3621444214
random 4057532 off rand 2018697059
random 4057532 off rand 1726199243
random 4057532 off rand 3608323581
random 4057532 off rand 1634212905
random 4057532 off rand 1518359867
random 4057532 off rand 3921331707
random 4057532 off rand 287004724
random 4057532 off rand 3673173177
random 4057532 off rand 2796675757
random 4057532 off rand 3988051731
random 4057532 off rand 1060357494
random 4057532 off rand 1685717462
random 4057532 off rand 2400737531
random 4057532 off rand 1891936796
random 4057532 off rand 3455447349
random 4057532 off rand 1553547805
random 4057532 off rand 2660809810
random 4057532 off rand 17263379
random 4057532 off rand 1823528783
random 4057532 off rand 1355450167
random 4057532 off rand 2956359995
random 4057532 off rand 3392712188
random 4057532 off rand 4240594610
$ $ head -n30 rand_read_offset.log
fio: set debug option random
Rand_Read_Testingg: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=libaio, iodepth=128
fio-3.1
Starting 1 process
random 4057831 off rand 259043585
random 4057831 off rand 3179521932
random 4057831 off rand 3621444214
random 4057831 off rand 2018697059
random 4057831 off rand 1726199243
random 4057831 off rand 3608323581
random 4057831 off rand 1634212905
random 4057831 off rand 1518359867
random 4057831 off rand 3921331707
random 4057831 off rand 287004724
random 4057831 off rand 3673173177
random 4057831 off rand 2796675757
random 4057831 off rand 3988051731
random 4057831 off rand 1060357494
random 4057831 off rand 1685717462
random 4057831 off rand 2400737531
random 4057831 off rand 1891936796
random 4057831 off rand 3455447349
random 4057831 off rand 1553547805
random 4057831 off rand 2660809810
random 4057831 off rand 17263379
random 4057831 off rand 1823528783
random 4057831 off rand 1355450167
random 4057831 off rand 2956359995
random 4057831 off rand 3392712188
random 4057831 off rand 4240594610
- 日志对比,发现右侧的随机偏移都是一样的:
获取Fio源码
- 分析的源码来源和版本如下:
$ git clone https://github.com/axboe/fio.git
$ cd fio
$ git branch -av
* master ee636f3 libaio: switch to newer libaio polled IO API
remotes/origin/HEAD -> origin/master
remotes/origin/latency-probe fcd4e74 target: fixes
remotes/origin/master ee636f3 libaio: switch to newer libaio polled IO API
分析debug选项
- 查找
debug
选项的定义和引用位置:
$ grep -rHn \"debug\"
init.c:176: .name = (char *) "debug",
- 查找
random
参数的定义和引用位置,可以看到random
参数使用FD_RANDOM
宏或者枚举值进行定义:
$ grep -rHn \"random\" -A5 init.c
init.c:2260: { .name = "random",
init.c-2261- .help = "Random generation logging",
init.c-2262- .shift = FD_RANDOM,
init.c-2263- },
init.c-2264- { .name = "parse",
init.c-2265- .help = "Parser logging",
- 查找
FD_RANDOM
这个宏开关的定义和引用位置,可以发现定义在debug.h
中,在io_u.c
中被引用,是用来开关debug
打印的,其中第98
行的和之前的Debug
日志格式相同:
$ grep -rHn FD_RANDOM
debug.h:13: FD_RANDOM,
init.c:2262: .shift = FD_RANDOM,
io_u.c:98: dprint(FD_RANDOM, "off rand %llu\n", (unsigned long long) r);
io_u.c:124: dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
- 查看
FD_RANDOM
引用处附近的额源码,第96
行处就是生成随机数的地方,第98
行对生成的随机数进行打印:
$ grep -rHn FD_RANDOM io_u.c -C12
io_u.c-86-
io_u.c-87-static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
io_u.c-88- enum fio_ddir ddir, uint64_t *b,
io_u.c-89- uint64_t lastb)
io_u.c-90-{
io_u.c-91- uint64_t r;
io_u.c-92-
io_u.c-93- if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE ||
io_u.c-94- td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE64) {
io_u.c-95-
io_u.c-96- r = __rand(&td->random_state);
io_u.c-97-
io_u.c:98: dprint(FD_RANDOM, "off rand %llu\n", (unsigned long long) r);
io_u.c-99-
io_u.c-100- *b = lastb * (r / (rand_max(&td->random_state) + 1.0));
io_u.c-101- } else {
io_u.c-102- uint64_t off = 0;
io_u.c-103-
io_u.c-104- assert(fio_file_lfsr(f));
io_u.c-105-
io_u.c-106- if (lfsr_next(&f->lfsr, &off))
io_u.c-107- return 1;
io_u.c-108-
io_u.c-109- *b = off;
io_u.c-110- }
--
io_u.c-112- /*
io_u.c-113- * if we are not maintaining a random map, we are done.
io_u.c-114- */
io_u.c-115- if (!file_randommap(td, f))
io_u.c-116- goto ret;
io_u.c-117-
io_u.c-118- /*
io_u.c-119- * calculate map offset and check if it's free
io_u.c-120- */
io_u.c-121- if (random_map_free(f, *b))
io_u.c-122- goto ret;
io_u.c-123-
io_u.c:124: dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
io_u.c-125- (unsigned long long) *b);
io_u.c-126-
io_u.c-127- *b = axmap_next_free(f->io_axmap, *b);
io_u.c-128- if (*b == (uint64_t) -1ULL)
io_u.c-129- return 1;
io_u.c-130-ret:
io_u.c-131- return 0;
io_u.c-132-}
io_u.c-133-
io_u.c-134-static int __get_next_rand_offset_zipf(struct thread_data *td,
io_u.c-135- struct fio_file *f, enum fio_ddir ddir,
io_u.c-136- uint64_t *b)
分析dprint函数
- 查找
dprint
函数或者宏的定义及引用处,定义在debug.h
中:
$ grep -rHn " dprint"
debug.h:62:#define dprint(type, str, args...) \
debug.h:71:static inline void dprint(int type, const char *str, ...)
gettime.c:320: dprint(FD_TIME, "tmp=%llu, sft=%u\n", tmp, sft);
io_u.h:153:static inline void dprint_io_u(struct io_u *io_u, const char *p)
io_u.h:170:#define dprint_io_u(io_u, p)
t/time-test.c:88:#define dprintf(...) if (DEBUG) { printf(__VA_ARGS__); }
- 查看
debug.h
中的dprint
定义内容:
$ grep -rHn " dprint" -C7 debug.h
debug.h-55-};
debug.h-56-extern const struct debug_level debug_levels[];
debug.h-57-
debug.h-58-extern unsigned long fio_debug;
debug.h-59-
debug.h-60-void __dprint(int type, const char *str, ...) __attribute__((format (printf, 2, 3)));
debug.h-61-
debug.h:62:#define dprint(type, str, args...) \
debug.h-63- do { \
debug.h-64- if (((1 << type) & fio_debug) == 0) \
debug.h-65- break; \
debug.h-66- __dprint((type), (str), ##args); \
debug.h-67- } while (0) \
debug.h-68-
debug.h-69-#