Linux下我们使用TOP命令查看发现 3.1%us 占用率低,但是21.4%wa占用率非常高,怎样获取IO等待相关信息。
top - 15:31:05 up 94 days, 3:09, 6 users, load average: 10.31, 12.89, 14.89
Tasks: 313 total, 1 running, 311 sleeping, 0 stopped, 1 zombie
Cpu(s): 3.1%us, 0.8%sy, 0.0%ni, 74.4%id, 21.4%wa, 0.1%hi, 0.3%si, 0.0%st
Mem: 8118112k total, 7752116k used, 365996k free, 46560k buffers
Swap: 16779852k total, 4124868k used, 12654984k free, 2102300k cached
1. 获取磁盘的IO信息
# iostat -x 1
Linux 2.6.16.54-0.2.12.2849.0.PTF.686599-smp (sdu08) 01/06/14
avg-cpu: %user %nice %system %iowait %steal %idle
1.79 0.00 2.69 3.34 0.00 91.50
Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util
sda 4.00 52.50 13.73 40.16 288.04 226.37 144.02 113.18 9.54 0.26 14.70 2.26 12.17
2. 获取分区的IO信息
# iostat -p 1
avg-cpu: %user %nice %system %iowait %steal %idle
0.25 0.00 1.25 4.75 0.00 93.00
Device: tps Blk_read/s Blk_wrtn/s Blk_read Blk_wrtn
sda 45.00 0.00 1304.00 0 1304
sda1 0.00 0.00 0.00 0 0
sda2 37.00 0.00 296.00 0 296
sda3 0.00 0.00 0.00 0 0
sda5 7.00 0.00 56.00 0 56
sda6 83.00 0.00 664.00 0 664
sda7 8.00 0.00 64.00 0 64
sda8 0.00 0.00 0.00 0 0
sda9 0.00 0.00 0.00 0 0
sda10 0.00 0.00 0.00 0 0
sda11 10.00 0.00 80.00 0 80
sda12 12.00 0.00 96.00 0 96
再查看device对应的分区
# df
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda2 15735128 832504 14902624 6% /
tmpfs 4 0 4 0% /dev/vx
udev 4059056 128 4058928 1% /dev
/dev/sda10 1052184 88272 963912 9% /boot
/dev/sda6 49173404 39756860 9416544 81% /home
/dev/sda5 10490040 9317880 1172160 89% /opt
/dev/sda7 20972152 10358908 10613244 50% /oracle
/dev/sda8 10490040 767396 9722644 8% /tmp
/dev/sda11 5245016 4767284 477732 91% /usr
/dev/sda12 2104376 2104376 0 100% /var
shm 8388608 2472040 5916568 30% /dev/shm
/dev/sda9 10490040 32840 10457200 1% /core
3. 查看一个分区有哪些进程在访问:
# fuser -vm /home
USER PID ACCESS COMMAND
/home: see 386 ..c.. csh
scu 754 ..c.. vsftpd
scu 936 ..c.. csh
see 1840 ..c.. sshd
scu 3158 ..c.. su
scu 3197 ..c.. csh
sne 4754 F.ce. idagent
sne 4780 F.ce. bsbus
4. 获取每一个进程每秒钟的IO读写块的个数:
# xio.py
===================================
pname pid reads write
pdflush 19344 0 1249
bash 9835 4 0
oracle 20054 0 3
命令会输出进程每秒读写的磁盘块数,获取每个块的大小的方法为:
# stat .
File: `.'
Size: 2816 Blocks: 5 IO Block: 4096 directory
Device: 802h/2050d Inode: 143 Links: 35
Access: (0755/drwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root)
Access: 2014-01-06 14:22:10.000000000 +0800
Modify: 2014-01-06 14:15:49.000000000 +0800
Change: 2014-01-06 14:15:49.000000000 +0800
xio.py如下
#!/usr/bin/python
# Monitoring per-process disk I/O activity
# written by http://www.vpsee.com
import sys, os, time, signal, re
class DiskIO:
def __init__(self, pname=None, pid=None, reads=0, writes=0):
self.pname = pname
self.pid = pid
self.reads = 0
self.writes = 0
def main():
argc = len(sys.argv)
if argc != 1:
print "usage: ./iotop"
sys.exit(0)
if os.getuid() != 0:
print "must be run as root"
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
os.system('echo 1 > /proc/sys/vm/block_dump')
print "TASK PID READ WRITE"
while True:
os.system('dmesg -c > /tmp/diskio.log')
l = []
f = open('/tmp/diskio.log', 'r')
line = f.readline()
while line:
m = re.match(\
'^(\S+)\((\d+)\): (READ|WRITE) block (\d+) on (\S+)', line)
if m != None:
if not l:
l.append(DiskIO(m.group(1), m.group(2)))
line = f.readline()
continue
found = False
for item in l:
if item.pid == m.group(2):
found = True
if m.group(3) == "READ":
item.reads = item.reads + 1
elif m.group(3) == "WRITE":
item.writes = item.writes + 1
if not found:
l.append(DiskIO(m.group(1), m.group(2)))
line = f.readline()
time.sleep(1)
print "\n======================================================="
print "%-16s %10s %10s %10s" % ("pname", "pid", "reads", "write")
for item in l:
print "%-16s %10s %10d %10d" % \
(item.pname, item.pid, item.reads, item.writes)
def signal_handler(signal, frame):
os.system('echo 0 > /proc/sys/vm/block_dump')
sys.exit(0)
if __name__=="__main__":
main()