1.1 hashlib模块使用
获取文件的MD5值,和shell下的MD5sum一样
方法一:先实例化一个对象,再使用update做校验,最后十六进制查看hexdigest
In [3]: import hashlib
In [5]: md5 = hashlib.md5()
In [6]: md5.update("a")
In [7]: md5.hexdigest()
Out[7]: '0cc175b9c0f1b6a831c399e269772661'
In [8]: md5.update("b\n") #叠加,实际为ab\n的值
In [9]: md5.hexdigest()
Out[9]: 'daa8075d6ac5ff8d0c6d4650adb4ef29'
说明:
对比shell,哈希值一样
[root@huangzp3 python]# echo "ab"|md5sum
daa8075d6ac5ff8d0c6d4650adb4ef29 -
与shell下一致,-n #表示不加换行符
[root@huangzp3 python]# echo "a"|md5sum
60b725f10c9c85c70d97880dfe8191b3 -
[root@huangzp3 python]# echo -n "a"|md5sum
0cc175b9c0f1b6a831c399e269772661 -
方法二:字符串短时,直接调用
In [10]: hashlib.md5("hello").hexdigest()
Out[10]: '5d41402abc4b2a76b9719d911017c592'
脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
#!/usr/bin/env python
import
sys
import
hashlib
import
codecs
def
getMd5(f):
md5
=
hashlib.md5()
with codecs.
open
(f) as fd:
while
True
:
data
=
fd.read(
4096
)
if
data:
md5.update(data)
#每读一次,获取一次md5值,MD5值累加
else
:
break
return
md5.hexdigest()
if
__name__
=
=
"__main__"
:
try
:
print
getMd5(sys.argv[
1
])
except
IndexError:
print
(
"%s follow a argument"
%
__file__)
|
运行结果:
[root@huangzp3 python]# python 01.py 1.py
e796a8f418fa90d7e4f0a162119f114a
[root@huangzp3 python]# cat 1.py |md5sum
e796a8f418fa90d7e4f0a162119f114a -
1.2 os.walk模块使用
os.walk():迭代目录里文件,返回一个列表,分别是:路径、路径下的目录、路径下的文件
In [17]: walk = os.walk("/root/python/a")
In [18]: for i,j,k in walk:print i,j,k
/root/python/a ['b'] ['a.txt']
/root/python/a/b ['c'] ['b.txt']
/root/python/a/b/c [] ['c.txt']
脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
#!/usr/bin/env python
import
sys
import
hashlib
import
codecs
import
os
def
getMd5(f):
md5
=
hashlib.md5()
with codecs.
open
(f) as fd:
while
True
:
data
=
fd.read(
4096
)
if
data:
md5.update(data)
else
:
break
return
md5.hexdigest()
walk
=
os.walk(sys.argv[
1
])
s
=
""
for
i,j,k
in
walk:
for
fil
in
k:
fa
=
os.path.join(i,fil)
result
=
getMd5(fa)
s
+
=
result
+
" "
+
fa
+
"\n"
print
s,
|
运行结果:
[root@huangzp3 python]# python 02.py /root/python/
b9f6f7a02766b3f9bd28369c6c331218 /root/python/20.py
e796a8f418fa90d7e4f0a162119f114a /root/python/1.py
01b1b4f4b2eb7155187c27057e9a4de2 /root/python/1.pyc
1.3 生成器yield
想捕获函数的值,但是又不想退出程序。yield,记住当前函数的值,用于下次调用。调用时用next()方法或者for循环遍历
修改如上脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#!/usr/bin/env python
import
sys
import
hashlib
import
os
import
codecs
def
getMd5(f):
md5
=
hashlib.md5()
with codecs.
open
(f) as fd:
while
True
:
data
=
fd.read(
4096
)
if
data:
md5.update(data)
else
:
break
return
md5.hexdigest()
def
fileMd5(topdir):
walk
=
os.walk(topdir)
for
i,j,k
in
walk:
for
fil
in
k:
fa
=
os.path.join(i,fil)
result
=
getMd5(fa)
yield
"%s %s"
%
(result,fa)
if
__name__
=
=
"__main__"
:
topdir
=
sys.argv[
1
]
md5
=
fileMd5(topdir)
for
lis
in
md5:
print
lis
|
1.4 文件md5值的校验
找出目录中内容相同的文件
脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
#!/usr/bin/env python
import
sys
import
hashlib
import
os
import
codecs
def
getMd5(f):
md5
=
hashlib.md5()
with codecs.
open
(f) as fd:
while
True
:
data
=
fd.read(
4096
)
if
data:
md5.update(data)
else
:
break
return
md5.hexdigest()
def
getDir(topdir):
dic
=
{}
walk
=
os.walk(topdir)
for
i,j,k
in
walk:
for
lis
in
k:
fil
=
os.path.join(i,lis)
md5
=
getMd5(fil)
if
dic.has_key(md5):
dic[md5].append(fil)
else
:
dic[md5]
=
[fil]
#dic[md5] = [fn] ,字典的key为MD5,value值为fn
return
dic
if
__name__
=
=
"__main__"
:
result
=
getDir(sys.argv[
1
])
for
i,j
in
result.items():
if
len
(j)>
1
:
print
i,j
|
运行结果:
[root@huangzp3 python]# python 04.py /root/python
b026324c6904b2a9cb4b88d6d61c81d1 ['./test/a/1.txt', './test/b/2.txt', './test.bak/test/a/1.txt', './test.bak/test/b/2.txt']
1.5 字典排序
字典是无序的
sorted(可迭代对象,cmp,key=根据什么排序,reverse=是否倒序),返回一个排序的列表
说明:key表示关键字;operator.itemgetter(0):根据字典的key值排序,(1)表示根据字典的value排序;reverse=True倒序
x.iteritems()返回一个序列,items()与iteritems()的区别和range()与xrange()的区别一样
找出占用空间大的文件
脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#/usr/bin/env python
import
sys
import
os
import
operator
def
getDic(topdir):
dic
=
{}
walk
=
os.walk(topdir)
for
i,j,k
in
walk:
for
lis
in
k:
fs
=
os.path.join(i,lis)
sz
=
os.path.getsize(fs)
dic[fs]
=
sz
return
dic
if
__name__
=
=
"__main__"
:
dic
=
getDic(sys.argv[
1
])
sorted_dic
=
sorted
(dic.iteritems(),key
=
operator.itemgetter(
1
),reverse
=
True
)
for
k,v
in
sorted_dic[:
10
]:
if
v>
1024
:
K
=
int
(v)
/
1024
if
K>
1024
:
M
=
K
/
1024
if
M>
1024
:
v
=
str
(M
/
1024
)
+
"G"
else
:
v
=
str
(M)
+
"M"
else
:
v
=
str
(K)
+
"K"
print
k,
"------>"
,v
|
运行结果:
[root@huangzp3 python]# python 05.py /data/program/
/data/program/mongo/data/data27018/local.2 ------> 511M
/data/program/mongo/data/data27018/local.1 ------> 511M
/data/program/mongo/data/data27017/local.1 ------> 511M
/data/program/mongo/data/data27017/local.2 ------> 511M
/data/program/mongo/logs/mongodb-27018.log ------> 488M
/data/program/mysql/lib/libmysqld.a ------> 235M
/data/program/mongo/data/data27018/journal/j._7 ------> 128M
/data/program/mysql/bin/mysqld ------> 95M
/data/program/mysql/bin/mysql_client_test_embedded ------> 87M
/data/program/mysql/bin/mysql_embedded ------> 87M
1.6 python调用外部命令
1)os.system():输出在终端上,捕获不到
In [4]: os.system("ls")
001.sh 10.sh 1.py
2)os.popen():只能捕捉到标准输出,捕捉不到标准错误输出
In [12]: a = os.popen("ls")
In [13]: a.read
a.read a.readline
a.readinto a.readlines
3)os.popen2():返回2个对象,一个标准输入、一个标准输出
In [14]: sdtin,stdout = os.popen2("sort")
/usr/bin/ipython:1: DeprecationWarning: os.popen2 is deprecated. Use the subprocess module.
#!/usr/bin/python2
4)os.popen3():返回3个对象,标准输入、标准输出、标准错误输出
In [16]: stdin,stdout,stderr = os.popen3("ls a")
/usr/bin/ipython:1: DeprecationWarning: os.popen3 is deprecated. Use the subprocess module.
#!/usr/bin/python2
5)os.popen4:返回2个对象,pipe_in和pipe_out_err
6)subprosess
subpeocess.call('ls -l --color /root',shell=True)
说明:输出不能捕捉到,类似于os.system;不加-l,就不用shell
subprocess.check_call(['mkdir','/tmp/aaa'])
说明:会抛出python异常
In [19]: subprocess.call("ls -l",shell=True)
total 580868
-rw-r--r-- 1 root root 103 Nov 22 16:23 001.sh
-rw-r--r-- 1 root root 375 Nov 1 03:00 100.sh
-rw-r--r-- 1 root root 47624178 Nov 1 19:35 100.txt
捕捉异常
脚本:
1
2
3
4
5
6
7
|
#/usr/bin/env python
import
subprocess
try
:
subprocess.check_call(
"exit 1"
,shell
=
True
)
#check_call会抛出python异常
except
subprocess.CalledProcessError:
pass
print
"hello world"
|
运行结果:
[root@huangzp3 python]# python 06.py
hello world