//注意Linux主机
//删除指定目录中文件名以 .tmp 为后缀的文件
1 #!/usr/bin/python 2 import os , glob 3 dirname = '/tmp' 4 allpy = glob.glob(os.path.join(dirname,'*')) 5 for filename in allpy: 6 filesize = os.path.getsize(filename) 7 if(filename.endswith('tmp')): 8 try: 9 os.remove(filename) 10 except: 11 continue
//输出指定文件夹(包括子文件夹)中最大的两个文件
1 #!/usr/bin/python 2 import os 3 dirname = '/tmp' 4 allsize = [] 5 for (root,dirs,files) in os.walk(dirname): 6 for filename in files: 7 fullname = os.path.join(root,filename) 8 filesize = os.path.getsize(fullname) 9 allsize.append((filesize,fullname)) 10 allsize.sort() 11 print allsize[-2:]
//删除指定文件(包括子文件夹)中的所有文件及文件夹
1 import os 2 for root, dirs, files in os.walk(top, topdown=False): 3 for name in files: 4 os.remove(os.path.join(root, name)) 5 for name in dirs: 6 os.rmdir(os.path.join(root, name))
//删除指定文件中的文件序列
1 #!/usr/bin/python 2 import os 3 sequence = raw_input('Sequence:') 4 sequence_start = int(raw_input('Start_Frame:')) 5 sequence_end = int(raw_input('End_Frame:')) 6 #remove the whitespace before or after the 'sequence' 7 sequence = sequence.strip() 8 lst = sequence.split('/') 9 sequence_path = sequence[:-len(lst[-1])] 10 sequence_name = lst[-1] 11 filename_lst = sequence_name.split('#') 12 padding = len(filename_lst[1:-1])+1 13 for index in range(sequence_start,sequence_end+1,1): 14 file_path = sequence_path + filename_lst[0] + (padding-len(str(index)))*'0' + str(index) + filename_lst[-1] 15 os.remove(file_path) 16
运行后分别输入:
/srv/scratch/houdini_zpeng/Render/ship/shipA####.tif
101
300
会把序列文件shipA####.tif 从101-300帧删除
//找到文件的绝对路径
1 >>> os.path.abspath('memo.txt') 2 '/Users/csev/memo.txt'
// 判断文件或文件夹是否存在
1 >>> os.path.isdir('memo.txt') 2 False 3 >>> os.path.isdir('music') 4 True
//返回给定目录地址的文件夹和文件列表:
1 >>> os.listdir(cwd) 2 ['music', 'photos', 'memo.txt']
//得到文件夹(包含子文件夹)中txt文件的数量
1 import os 2 count = 0 3 for (dirname, dirs, files) in os.walk('.'): 4 for filename in files: 5 if filename.endswith('.txt') : 6 count = count + 1 7 print 'Files:', count
OUTPUT:
python txtcount.py
Files: 1917
os.walk 会递归的进入所有的文件夹和子文件夹,'.'字符表示当前文件夹。
//从命令行读取参数
1 import sys 2 print 'Count:', len(sys.argv) 3 print 'Type:', type(sys.argv) 4 for arg in sys.argv: 5 print 'Argument:', arg
sys.argv 内容是字符串类型的列表,列表的第一个是程序的名字,剩下的是在command line中程序名字后面的参数
//程序argtest从command line 接受多个参数
1 python argtest.py hello there
OUTPUT:
Count: 3 Type: <type 'list'> Argument: argtest.py Argument: hello Argument: there
//sys.argv的另一个例子
1 import sys 2 name = sys.argv[1] 3 handle = open(name, 'r') 4 text = handle.read() 5 print name, 'is', len(text), 'bytes'
OUTPUT:
1 python argfile.py mbox-short.txt 2 mbox-short.txt is 94626 bytes
//PIPE
//任何从Shell启动的程序(process),你都可以利用Python的“Pipe"来启动,A pipe is an object that represents a running process.
//利用 os.popen 启动 ls:upperCaseList
1 >>> cmd = 'ls -l' 2 >>> fp = os.popen(cmd)
os.popen的参数是一个字符串,包含一个命令,返回值是一个 file pointer, 类似 open file. 你可以利用readline(每次读一行)或者read(一下全部读完),来读取ls -l 进程(process)的执行结果。
>>> res = fp.read()
//完成操作后,你可以类似file的关闭,关闭pipe
>>> stat = fp.close() >>> print stat
None
fp.close()返回ls 进程(process)的执行状态,None 表示 正常关闭(没有错误)
//抓取一个网页,分析素据,其基本数据形式如下:
<tr><td>Modu</td><td><span class="comments">90</span></td></tr> <tr><td>Kenzie</td><td><span class="comments">88</span></td></tr> <tr><td>Hubert</td><td><span class="comments">87</span></td></tr>
//抓取span标签中的数字,把个数和总和计算出来
1 import urllib 2 from BeautifulSoup import * 3 url = raw_input("Enter -") 4 html = urllib.urlopen(url).read() 5 soup = BeautifulSoup(html) 6 sum = 0 7 count = 0 8 spans = soup("span") 9 for span in spans: 10 value = span.contents[0] //获取标签中的数值, 获取所有属性span.attrs,
//获取某个属性的值span.get('class', None),如果属性不存在,返回None 11 if value: 12 count +=1 13 sum +=int(value) 14 print("Count %s" % count) 15 print("Sum %s" % sum)
//输入网址:http://python-data.dr-chuck.net/comments_217954.html,得到如下数据
Enter - http://python-data.dr-chuck.net/comments_217954.html Count 50 Sum 2591
//newList = [do something for variable in old list if condition]
1 mixedList = [1, 2, 3, 'a', 'b', 'c'] 2 upperCaseList = [item.upper() for item in mixedList if isinstance(item, str)] 3 upperCaseList
//Stackflow一个比较好的groupby例子
1 from operator import itemgetter
2 from itertools import * 3 4 data1=[01,02,03,10,11,100,9999] 5 data2=[0001,0002,0003,0010,0011,0100,9999] 6 data3=['image_0001','image_0002','image_0003','image_0010','image_0011','image_0011-2','image_0011-3','image_0100','image_9999'] 7 8 list1 = [] 9 for k, g in groupby(enumerate(data1), lambda (i,x):i-x): 10 list1.append(map(itemgetter(1), g)) 11 print 'data1' 12 print list1 13 14 list2 = [] 15 for k, g in groupby(enumerate(data2), lambda (i,x):i-x): 16 list2.append(map(itemgetter(1), g)) 17 print '\ndata2' 18 print list2
运行结果:
data1
[[1, 2, 3], [10, 11], [100], [9999]]
data2
[[1, 2, 3], [8, 9], [64], [9999]]
1 #!/usr/bin/env python 2 import itertools 3 import re 4 5 DATA = ["image_0001", "image_0002", "image_0003", 6 "image_0010", "image_0011", 7 "image_0011-1", "image_0011-2", "image_0011-3", 8 "image_0100", "image_9999"] 9 10 def extract_number(name): 11 return re.findall(r"\d+$",name)[0] 12 13 def collapse_group(group): 14 if len(group) == 1: 15 return group[0][1] 16 first = extract_number(group[0][1]) 17 last = extract_number(group[-1][1]) 18 19 length = len(str(int(last))) 20 21 return "%s[%s-%s]" % (group[0][1][:-length],first[-length:],last[-length:]) 22 23 groups = [collapse_group(tuple(group)) for key, group in itertools.groupby(enumerate(DATA),lambda(index,name):index - int(extract_number(name)))] 24 for i in groups: 25 print i
输出结果:
1 image_000[1-3] 2 image_00[10-11] 3 image_0011-[1-3] 4 image_0100 5 image_9999
// format 格式
字典的用法
1 robot = dict( 2 name = "Blender", 3 numCPU = 160, 4 version = "3.01c", 5 memory = 64, 6 releaseDate = 2020 7 ) 8 print "Here is new robot - {name},it has {numCPU} CPU's, {memory} GB of memory,\n " \ 9 "OS version: {version},it will be available in year {releaseDate}.".format(**robot)
显示
1 Here is new robot - Blender,it has 160 CPU's, 64 GB of memory, 2 OS version: 3.01c,it will be available in year 2020.
list 用法
1 user = ["Peng","age","Male"] 2 print "User name is {0}, Age:{1},Sex:{2}".format(*user)
输出显示:
1 User name is Peng, Age:age,Sex:Male
format 填充
print "{0:#^30}\n{1:.^30}".format("","HELLO")
输出:
1 ############################## 2 ............HELLO.............
{1:.^30} : 1表示取“HELLO", .是填充字符,^表示居中对齐,30 表示宽度
1 def nice_print(): 2 result = '' 3 for x in range(1,11): 4 result +="{0}{0:.>10}*{0} = {1} \n".format(x,x*x) 5 print result 6 nice_print()
输出:
1 1.........1*1 = 1 2 2.........2*2 = 4 3 3.........3*3 = 9 4 4.........4*4 = 16 5 5.........5*5 = 25 6 6.........6*6 = 36 7 7.........7*7 = 49 8 8.........8*8 = 64 9 9.........9*9 = 81 10 10........10*10 = 100
^、<、>分别是居中、左对齐、右对齐,后面带宽度
'{:>8}'.format('189')
输出:
' 189'
'{:0>8}'.format('189')
输出:
'00000189'
'{:a>8}'.format('189')
输出:
'aaaaa189'