9–1. 文件过滤. 显示一个文件的所有行, 忽略以井号( # )开头的行. 这个字符被用做Python , Perl, Tcl, 等大多脚本文件的注释符号.附加题: 处理不是第一个字符开头的注释.
- f=open('test.txt','r')
- for eachline in f:
- if eachline.startswith('#'):
- continue
- elif '#' in eachline:
- loc=eachline.find('#')
- print eachline[:loc]
- else:
- print eachline,
- f.close()
- N=int(raw_input('pls input a number:'))
- F=raw_input('pls input a file name:')
- f=open(F,'r')
- alllines=f.readlines()
- f.close()
- for i in range(N):
- print alllines[i],
- F=raw_input('pls input a file name:')
- f=open(F,'r')
- alllines=f.readlines()
- f.close()
- print len(alllines)
写一个逐页显示文本文件的程序. 提示输入一个文件名, 每次显示文本文件的 25 行, 暂停并向用户提示"按任意键继续.", 按键后继续执行.
- import os
- F=raw_input('pls input a file name:')
- n=0
- f=open(F,'r')
- for i in f:
- print i,
- n+=1
- if n==25:
- n=0
- os.system('pause')
- f.close()
- f=open('test.txt','r')
- scores=[]
- for i in f:
- if 0<=int(i.strip())<=100:
- scores.append(int(i.strip()))
- if int(i.strip())<60:
- print 'score is F' ,i
- elif 60<=int(i.strip())<=69:
- print 'score is D',i
- elif 70<=int(i.strip())<=79:
- print 'score is C',i
- elif 80<=int(i.strip())<=89:
- print 'score is B',i
- elif 90<=int(i.strip())<=100:
- print 'score is A',i
- else:
- print 'score wrong,please input again',i
- f.close()
- print 'average score is %f' %(sum(scores)//len(scores))
- F1=raw_input('pls input a file name:')
- F2=raw_input('pls input a file name:')
- f1=open(F1,'r')
- f1alllines=f1.readlines()
- f1.close()
- f2=open(F2,'r')
- f2alllines=f2.readlines()
- f2.close()
- len1=len(f1alllines)
- len2=len(f2alllines)
- smallfile=len1 if len1<=len2 else len2
- for i in range(smallfile):
- if cmp(f1alllines[i],f2alllines[i])!=0:
- print 'row is %d ' %(i+1)
- len3=len(f1alllines[i])
- len4=len(f2alllines[i])
- smallstr=len3 if len3<=len4 else len4
- for j in range(smallstr):
- if cmp(f1alllines[i][j],f2alllines[i][j])!=0:
- print 'column is %d ' %(j+1)
- break
- break
- else:
- if len1==len2:
- print '2 files equal'
- else:
- print 'row is %d ' %(i+2)
- option={}
- f=open(r'c:\windows\win.ini')
- for line in f:
- if line.startswith(';'):
- continue
- if line.startswith('['):
- iterm=[]
- name = line[1:line.rfind(']')]
- option.setdefault(name,iterm)
- continue
- if '=' in line:
- option[name].append(line.strip())
- print option
- m=raw_input('pls input a module name: ')
- module=__import__(m)
- ml=dir(module)
- print ml
- for i in ml:
- print 'name: ',i
- print 'type: ',type(getattr(module,i))
- print 'value: ',getattr(module,i)
进入 Python 标准库所在的目录. 检查每个 .py 文件看是否有__doc__ 字符串, 如果有, 对其格式进行适当的整理归类. 你的程序执行完毕后, 应该会生成一个漂亮的清单. 里边列出哪些模块有文档字符串, 以及文档字符串的内容. 清单最后附上那些没有文档字符串模块的名字.附加题: 提取标准库中各模块内全部类(class)和函数的文档.
- import os
- pymodules={}
- path=r'D:\Program Files\Python27\Lib'
- pyfiles=[f for f in os.listdir(path) if f.endswith('.py')]
- for f in pyfiles:
- module=f[:-3]
- pymodules.setdefault(module,'')
- pyfile=path+os.sep+f
- fobj=open(pyfile)
- doc=False
- for line in fobj:
- if line.strip().startswith('"""''"""') and line.strip().endswith('"""'):
- pymodules[module]+=line
- fobj.close()
- break
- elif (line.strip().startswith('"""''"""') or line.strip().startswith('r"""')) and len(line)>3:
- doc=True
- pymodules[module]+=line
- continue
- elif doc:
- if line=='"""':
- pymodules[module]+=line
- fobj.close()
- doc=False
- break
- else:
- pymodules[module]+=line
- else:
- continue
- else:
- fobj.close()
- hasdoc=[]
- nodoc=[]
- for module in pymodules:
- if pymodules[module]:
- hasdoc.append(module)
- else:
- nodoc.append(module)
- print 'module has no doc:'
- for key in nodoc:
- print key,
- print '\n'
- print 'module has doc:'
- for key in hasdoc:
- print 'module:',key
- print 'doc:',pymodules[key]
9–11. Web 站点地址.
a) 编写一个 URL 书签管理程序. 使用基于文本的菜单, 用户可以添加, 修改或者删除书签数据项. 书签数据项中包含站点的名称, URL 地址, 以及一行简单说明(可选). 另外提供检索功能,可以根据检索关键字在站点名称和 URL 两部分查找可能的匹配. 程序退出时把数据保存到一个磁盘文件中去; 再次执行时候加载保存的数据.
b)改进 a) 的解决方案, 把书签输出到一个合法且语法正确的 HTML 文件(.html 或 htm )中,这样用户就可以使用浏览器查看自己的书签清单. 另外提供创建"文件夹"功能, 对相关的书签进行分组管理.
附加题: 请阅读 Python 的 re 模块了解有关正则表达式的资料, 使用正则表达式对用户输入的 URL 进行验证.
- import re,os
- def checkurl(url):
- regex = re.compile(
- r'^(?:http|ftp)s?://' # http:// or https://
- r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain...
- r'localhost|' #localhost...
- r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
- r'(?::\d+)?' # optional port
- r'(?:/?|[/?]\S+)$', re.IGNORECASE)
- if regex.match(url):
- return True
- else:
- return False
- def geturl():
- name=raw_input('pls input a url name:')
- while 1:
- url=raw_input('pls input a url address:')
- if checkurl(url):
- break
- else:
- print 'wrong url format,pls input again'
- mark=raw_input('pls input a url mark:')
- folder=raw_input('pls input a url folder:')
- return (name,url,mark,folder)
- def load(filename):
- f=open(filename,'a+')
- bmlist=f.readlines()
- f.close()
- return bmlist
- def save(bmlist,filename):
- f=open(filename,'w+')
- for line in bmlist:
- if len(line)==0:
- continue
- f.write(line)
- f.close()
- def add(bmlist,name,url,mark,folder='default'):
- bookmark=''
- bookmark=name+';'+url+';'+mark+';'+folder+os.linesep
- if bookmark not in bmlist:
- bmlist.append(bookmark)
- def modify(bmlist,index,name,url,mark,folder):
- bookmark=''
- bookmark=name+';'+url+';'+mark+';'+folder+os.linesep
- bmlist[index]=bk
- def delbm(bmlist,index):
- bmlist.pop(index)
- def findbk(bmlist,fname,furl):
- for i,item in enumerate(bmlist):
- (name,url,mark,folder)=item.split(';')
- if fname and furl:
- if (fname in name) and (furl in url):
- return i
- if fname and (fname in name):
- return i
- if furl and (furl in url):
- return i
- else:
- return -1
- def output2html(bmlist):
- for i,item in enumerate(bmlist):
- (name, url, mark, folder) = item.split(';')
- os.mkdir(folder.strip())
- filename=name.strip()+'.html'
- f=open(filename,'w+')
- fmt = '%d\t%s\t<a href=%s>%s</a>\t%s\t%s<br>'
- f.write('<html><head><title>bookmark</title></head><body>')
- content = fmt % (i+1, name, r'http:\\' + url, url, mark, folder)
- f.write(content)
- f.write('</body></html>')
- f.close()
- os.rename(filename,folder.strip()+os.sep+filename)
- bmlist=load(r'url.txt')
- print bmlist
- while True:
- print '0. quit'
- print '1. add a url bookmark'
- print '2. modify a url bookmark'
- print '3. delete a url bookmark'
- print '4. find a url bookmark'
- print '5. output url bookmark as html'
- print '\n'
- iInput = input("please input operation num: ")
- if (0 == iInput):
- save(bmlist,r'url.txt')
- break
- elif (iInput<0 or iInput>5):
- print 'Error input operation, try agin. 0 operation is quit\n'
- continue
- elif 1 == iInput:
- data=geturl()
- add(bmlist,*data)
- print bmlist
- elif 2 == iInput:
- index=int(raw_input('bookmark index:'))
- data=geturl()
- modify(bmlist,index,*data)
- print bmlist
- elif 3 == iInput:
- index=int(raw_input('bookmark index:'))
- delbm(bmlist,index)
- print bmlist
- elif 4 == iInput:
- name=raw_input('url name:')
- url=raw_input('url address:')
- index=findbk(bmlist,name,url)
- if index==-1:
- print 'not found'
- else:
- print bmlist[index]
- elif 5 == iInput:
- output2html(bmlist)
a)数据应保存在磁盘中,使用冒号:分隔,一次写入一行,例如“Joe:boohoo:953176591.145,文件中数据的行数应该等于你系统上的用户数。
b)进一步改进你的程序,不再一次写入一行,而使用pickle模块保存整个数据对象。请参阅pickle模块的文档了解如何序列化/扁平化对象,以及如何读写保存的对象。一般来说,这个解决方案的代码行数要比a)少;
c)使用shelve模块替换pickle模块,由于可以省去一些维护代码,这个解决方案的代码比b)的更少。
- from datetime import datetime
- import hashlib,os
- import pickle as p
- import shelve as s
- db={}
- def newuser():
- value=[]
- prompt='login name desired again: '
- while True:
- name=raw_input(prompt).lower()
- if not name.isalnum() and '' in name:
- print 'name format error'
- continue
- else:
- if db.has_key(name):
- prompt='name taken,try another: '
- continue
- else:
- break
- pwd=raw_input('login passwd desired: ')
- m=hashlib.md5()
- m.update(pwd)
- value.append(m.hexdigest())
- value.append(datetime.now())
- db[name]=value
- print 'new user is %s, register time is %s' %(name,db[name][1])
- def olduser():
- name=raw_input('login name desired again: ').lower()
- pwd=raw_input('login passwd desired: ')
- m=hashlib.md5()
- m.update(pwd)
- passwd=db.get(name)
- if passwd[0]==m.hexdigest():
- newtime=datetime.now()
- if (newtime-db[name][1]).days==0 and (newtime-db[name][1]).seconds<14400:
- print 'you already logged in at %s: ' %(db[name][1])
- else:
- passwd[1]=newtime
- print 'welcome back %s, login time is %s' %(name,passwd[1])
- else:
- print 'login incorrect'
- def removeuser():
- print db
- name=raw_input('input a user name to remove: ').lower()
- if name in db:
- db.pop(name)
- else:
- print 'input error'
- def userlogin():
- while True:
- name=raw_input('login name desired: ').lower()
- if not name.isalnum() and '' in name:
- print 'name format error'
- continue
- else:
- if not db.has_key(name):
- print 'user name is not in db'
- answer=raw_input('register a new user? y/n').lower()
- if 'y'==answer:
- newuser()
- break
- elif 'n'==answer:
- break
- else:
- print 'user name is already in db'
- olduser()
- break
- def outputA():
- print db
- f=open('account.txt','w')
- for key in db:
- user=key+':'+db[key][0]+':'+str(db[key][1])+os.linesep
- f.write(user)
- f.close()
- def outputB():
- accountfile='pickle.data'
- f=open(accountfile,'w')
- p.dump(db,f)
- f.close()
- f=open(accountfile)
- accountdb=p.load(f)
- print accountdb
- def outputC():
- accountfile='shelve.data'
- accountdb=s.open(accountfile,'c')
- accountdb['data']=db
- accountdb.close()
- accountdb=s.open(accountfile,'r')
- print accountdb['data']
- def adminlogin():
- while True:
- name=raw_input('login name desired: ').lower()
- if not name.isalnum() and '' in name:
- print 'name format error'
- continue
- else:
- pwd=raw_input('login passwd desired: ')
- if name=='root' and pwd=='root':
- print 'welcom admin'
- break
- else:
- print 'user name or passwd is wrong,input again'
- if len(db)==0:
- print 'there is nothing you can do'
- else:
- answer=raw_input('output all account? y/n').lower()
- if 'y'==answer:
- #outputA()
- #outputB()
- outputC()
- elif 'n'==answer:
- print 'bye'
- def showmenu():
- prompt="""
- (A)dmin Login
- (U)ser Login
- (R)emove a existing user
- (Q)uit
- Enter choice:"""
- done=False
- while not done:
- chosen=False
- while not chosen:
- try:
- choice=raw_input(prompt).strip()[0].lower()
- except (EOFError,keyboardInterrupt):
- choice='q'
- print '\nYou picked: [%s]' % choice
- if choice not in 'aurq':
- print 'invalid option,try again'
- else:
- chosen=True
- if choice=='q':
- done=True
- if choice=='r':
- removeuser()
- if choice=='u':
- userlogin()
- if choice=='a':
- adminlogin()
- if __name__=='__main__':
- showmenu()
a) 什么是命令行参数, 它们有什么用?
命令行参数是调用某个程序时除程序名以外的其他参数。命令行参数使程序员可以在启动一个程序时对程序行为作出选择。
b) 写一个程序, 打印出所有的命令行参数.
- import sys
- print str(sys.argv)
$ calc.py 1 + 2
3
$ calc.py 3 ^ 3
27
$ calc.py print
1 + 2
3
3 ^ 3
27
$ calc.py print
$
- import sys,os
- if sys.argv[1]=='print':
- if os.path.exists(r'test.txt'):
- f=open(r'test.txt','r')
- for line in f:
- print line
- f.close()
- else:
- print 'no file yet'
- f=open(r'test.txt','w')
- f.close()
- else:
- print sys.argv[1],sys.argv[2],sys.argv[3]
- a,b=sys.argv[1],sys.argv[3]
- operation=sys.argv[2]
- expression=sys.argv[1]+''+sys.argv[2]+''+sys.argv[3]+os.linesep
- f=open(r'test.txt','a+')
- f.write(expression)
- if '+' == operation:
- print float(a)+float(b)
- result=str(float(a)+float(b))+os.linesep+os.linesep
- f.write(result)
- elif '-' == operation:
- print float(a)-float(b)
- result=str(float(a)-float(b))+os.linesep+os.linesep
- f.write(result)
- elif '**' == operation:
- print float(a)**float(b)
- result=str(float(a)**float(b))+os.linesep+os.linesep
- f.write(result)
- elif '/' == operation:
- print float(a)/float(b)
- result=str(float(a)/float(b))+os.linesep+os.linesep
- f.write(result)
- elif '%' == operation:
- print float(a)%float(b)
- result=str(float(a)%float(b))+os.linesep+os.linesep
- f.write(result)
- elif '*' == operation:
- print float(a)*float(b)
- result=str(float(a)*float(b))+os.linesep+os.linesep
- f.write(result)
- f.close()
- import os
- file1=raw_input('input first file name:')
- file2=raw_input('input second file name:')
- f1=open(file1,'r')
- f2=open(file2,'a+')
- f2.write(os.linesep)
- for line in f1:
- f2.write(line)
- f1.close()
- f2.close()
人们输入的文字常常超过屏幕的最大宽度. 编写一个程序, 在一个文本文件中查找长度大于 80 个字符的文本行. 从最接近 80 个字符的单词断行, 把剩余文件插入到下一行处.程序执行完毕后, 应该没有超过 80 个字符的文本行了.
- import os
- content=[]
- f=open(r'test.txt','r')
- lines=f.readlines()
- f.close()
- for line in lines:
- if len(line)<=80:
- content.append(line)
- else:
- words=line.strip().split()
- sum=0
- l=''
- for w in words:
- w+=' '
- sum+=len(w)
- if sum<80:
- l+=w
- else:
- content.append(l)
- l=w
- sum=len(w)
- else:
- content.append(l)
- l=''
- f=open(r'test1.txt','w')
- for item in content:
- f.write(item+os.linesep)
- f.close()
创建一个原始的文本文件编辑器. 你的程序应该是菜单驱动的, 有如下这些选项:
1) 创建文件(提示输入文件名和任意行的文本输入),
2) 显示文件(把文件的内容显示到屏幕),
3) 编辑文件(提示输入要修改的行, 然后让用户进行修改),
4) 保存文件, 以及
5) 退出.
- import os
- def create(filename):
- content=[]
- while True:
- line=raw_input('pls input a line,quit as e:')
- if line != 'e':
- content.append(line)
- else:
- break
- f=open(filename,'w')
- for line in content:
- f.write(line+os.linesep)
- f.close()
- def show(filename):
- if os.path.exists(filename):
- f=open(filename)
- for line in f:
- print line,
- f.close()
- else:
- print 'no file yet'
- def edit(filename,index,content):
- f=open(filename)
- ls=f.readlines()
- f.close()
- ls[index]=content
- return ls
- def save(filename,ls):
- f=open(filename,'w')
- for line in ls:
- f.write(line)
- f.close()
- def main():
- filename=''
- ls=[]
- while True:
- print '\n'
- print '1. create a file'
- print '2. show a file'
- print '3. edit a file'
- print '4. save a file'
- print '5. quit'
- ch=raw_input('input a choice:')
- if ch not in '1234':
- break
- elif ch=='1':
- filename=raw_input('input a file name:')
- create(filename)
- elif ch=='2':
- filename=raw_input('input a file name:')
- show(filename)
- elif ch=='3':
- if filename == '':
- filename = raw_input('file name: ')
- index=int(raw_input('input a index of line:'))
- content=raw_input('pls input a line:')
- ls=edit(filename,index,content)
- elif ch=='4':
- save(filename,ls)
- if __name__=='__main__':
- main()
- value=int(raw_input('input a value between 0 and 255:'))
- filename=raw_input('input a fielname:')
- ch=chr(value)
- f=open(filename)
- print sum(iterm.count(ch) for iterm in f)
- f.close()
创建前一个问题的辅助程序. 创建一个随机字节的二进制数据文件, 但某一特定字节会在文件中出现指定的次数. 该程序接受三个参数:
1) 一个字节值( 0 - 255 ),
2) 该字符在数据文件中出现的次数, 以及
3) 数据文件的总字节长度.
你的工作就是生成这个文件, 把给定的字节随机散布在文件里, 并且要求保证给定字符在文件中只出现指定的次数, 文件应精确地达到要求的长度.
- import random
- def createfile(value,count,len):
- ls=[]
- n=len-count
- for i in range(n):
- ran=random.randint(0,255)
- if ran!=value:
- ls.append(chr(ran))
- elif ran==value and value==0:
- ran=random.randint(1,255)
- ls.append(chr(ran))
- elif ran==value and value==255:
- ran=random.randint(0,254)
- ls.append(chr(ran))
- elif ran==value:
- ran=random.randint(0,value-1)
- ls.append(chr(ran))
- for i in range(count):
- ls.insert(random.randint(0,n),chr(value))
- f=open(r'test.txt','wb')
- f.write(''.join(ls))
- f.close()
- createfile(97,3,50)
- f=open(r'test.txt','rb')
- for i in f:
- print i
- f.seek(0,0)
- print len(f.readlines()[0])
- f.close()
写一小段代码, 压缩/解压缩 gzip 或 bzip 格式的文件. 可以使用命令行下的 gzip 或 bzip2 以及 GUI 程序 PowerArchiver , StuffIt , 或 WinZip 来确认你的 Python支持这两个库.
- import gzip
- #compress
- f_in=open(r'test.txt','rb')
- f_out=gzip.open(r'test.txt.gz','wb')
- f_out.writelines(f_in)
- f_out.close()
- f_in.close()
- #decompress
- f=gzip.open(r'test.txt.gz','rb')
- f_out=open(r'test1.txt','wb')
- content=f.read()
- f_out.write(content)
- f.close()
- f_out.close()
创建一个程序, 可以往 ZIP 归档文件加入文件, 或从中提取文件,有可能的话, 加入创建ZIP 归档文件的功能.
- import zipfile
- def create_zipfile(zipname,filename1,filename2):
- z=zipfile.ZipFile(zipname,'w')
- z.write(filename1)
- z.write(filename2)
- z.close()
- def add_zipfile(zipname,filename):
- z=zipfile.ZipFile(zipname,'a')
- z.write(filename)
- z.close()
- def extract_zipfile(zipname,filename):
- z=zipfile.ZipFile(zipname,'r')
- z.extract(filename)
- z.close()
- if __name__=='__main__':
- create_zipfile(r'test.zip',r'test.txt',r'test1.txt')
- add_zipfile(r'test.zip',r'test2.txt')
- extract_zipfile(r'test.zip',r'test.txt')
unzip -l 命令显示出的 ZIP 归档文件很无趣. 创建一个 Python脚本 lszip.py , 使它可以显示额外信息: 压缩文件大小, 每个文件的压缩比率(通过比较压缩前后文件大小), 以及完成的 time.ctime() 时间戳, 而不是只有日期和 HH:MM .
提示: 归档文件的 date_time 属性并不完整, 无法提供给 time.mktime() 使用....这由你自己决定.
- import zipfile,os,time
- filename=raw_input('zip file name:')
- print 'zip file size: %d bytes' %(os.stat(filename).st_size)
- z=zipfile.ZipFile(filename,'r')
- print 'filename\tdatetime\tsize\tcompress size\trate'
- for info in z.infolist():
- t = time.ctime(time.mktime(tuple(list(info.date_time) + [0, 0, 0])))
- print '%s\t%s\t%d\t%d\t%.2f%%' %(info.filename, t, info.file_size, info.compress_size, float(info.compress_size) / info.file_size * 100)
- z.close()
为 TAR 归档文件建立类似上个问题的程序. 这两种文件的不同之处在于 ZIP 文件通常是压缩的, 而 TAR 文件不是, 只是在 gzip 和 bzip2 的支持下才能完成压缩工作. 加入任意一种压缩格式支持.附加题: 同时支持 gzip 和 bzip2 .
- import tarfile
- def create_tarfile(tarname,filename1,filename2):
- t=tarfile.open(tarname,'w:gz')#w:bz2
- t.add(filename1)
- t.add(filename2)
- t.close()
- def extract_tarfile(tarname):
- t=tarfile.open(tarname,'r')
- t.extractall(r'D:\test')
- t.close()
- if __name__=='__main__':
- create_tarfile(r'test.tar.gz',r'test.txt',r'test1.txt')
- extract_tarfile(r'test.tar.gz')
参考前两个问题的解决方案, 写一个程序, 在 ZIP (.zip) 和TAR/gzip (.tgz/.tar.gz) 或 TAR/bzip2 (.tbz/.tar.bz2) 归档文件间移动文件. 文件可能是已经存在的, 必要时请创建文件.
- import zipfile,tarfile,os
- def movefile(src,dst,filename):
- if src.endswith('.zip') and dst.endswith(('.tar.gz', '.tgz', '.tbz', '.tar.bz2')):
- z=zipfile.ZipFile(src,'a')
- if filename not in z.namelist():
- f=open(filename,'w')
- f.close()
- z.write(filename)
- z.extract(filename)
- else:
- z.extract(filename)
- z.close()
- t=tarfile.open(dst,'r')
- ls=t.getnames()
- if filename not in ls:
- t.extractall()
- t.close()
- mode='w:gz' if dst.endswith(('tar.gz','tgz')) else 'w:bz2'
- t=tarfile.open(dst,mode)
- for name in ls+[filename]:
- t.add(name)
- os.remove(name)
- t.close()
- t.close()
- elif src.endswith(('.tar.gz', '.tgz', '.tbz', '.tar.bz2')) and dst.endswith(('.zip')):
- t=tarfile.open(src,'r')
- if filename not in t.getnames():
- f=open(filename,'w')
- f.close()
- else:
- t.extract(filename)
- t.close()
- z=zipfile.ZipFile(dst,'a')
- if filename not in z.namelist():
- z.write(filename)
- z.close()
- os.remove(filename)
- if __name__=='__main__':
- movefile(r'test.zip',r'test.tar.gz',r'test2.txt')
- movefile(r'test.tar.gz',r'test.zip',r'test2.txt')
创建一个程序, 接受任意数目的归档文件以及一个目标目录做为参数.归档文件格式可以是 .zip, .tgz, .tar.gz, .gz, .bz2, .tar.bz2, .tbz 中的一种或几种. 程序会把第一个归档文件解压后放入目标目录, 把其它归档文件解压后放入以对应文件名命名的目录下(不包括扩展名). 例如输入的文件名为 header.txt.gz 和 data.tgz , 目录为 incoming ,header.txt 会被解压到 incoming 而 data.tgz 中的文件会被放入 incoming/data .
- import zipfile,tarfile,gzip,bz2,os
- def depressfile(src,dst):
- if os.path.isdir(src):
- filenames=os.listdir(src)
- if filenames[0].endswith(('.tar.gz', '.tgz', '.tbz', '.tar.bz2')):
- t=tarfile.open(src+os.sep+filenames[0],'r')
- t.extractall(dst)
- t.close()
- elif filenames[0].endswith('.gz'):
- g=gzip.open(src+os.sep+filenames[0],'rb')
- ug=open(dst+os.sep+filenames[0][:-3],'wb')
- data=g.read()
- ug.write(data)
- ug.close()
- g.close()
- elif filenames[0].endswith('.bz2'):
- b=bz2.BZ2File(src+os.sep+filenames[0],'r')
- ub=open(dst+os.sep+filenames[0][:-4],'w')
- data=b.read()
- ub.write(data)
- ub.close()
- b.close()
- elif filenames[0].endswith('.zip'):
- z=zipfile.ZipFile(src+os.sep+filenames[0],'r')
- z.extractall(dst)
- z.close()
- filenames.remove(filenames[0])
- for name in filenames:
- dirname = os.path.splitext(os.path.basename(name))[0]
- if dirname in os.listdir(dst):
- dirname = dst+os.sep+dirname+str(filenames.index(name))
- else:
- dirname = dst+os.sep+dirname
- os.mkdir(dirname)
- if name.endswith(('.tar.gz', '.tgz', '.tbz', '.tar.bz2')):
- t=tarfile.open(src+os.sep+name,'r')
- t.extractall(dirname)
- t.close()
- elif name.endswith('.gz'):
- g=gzip.open(src+os.sep+name,'rb')
- ug=open(dirname+os.sep+name[:-3],'wb')
- data=g.read()
- ug.write(data)
- ug.close()
- g.close()
- elif name.endswith('.bz2'):
- b=bz2.BZ2File(src+os.sep+name,'r')
- ub=open(dirname+os.sep+name[:-4],'w')
- data=b.read()
- ub.write(data)
- ub.close()
- b.close()
- elif name.endswith('.zip'):
- z=zipfile.ZipFile(src+os.sep+name,'r')
- z.extractall(dirname)
- z.close()
- else:
- print '%s is not a directory,input again' %(src)
- if __name__=='__main__':
- depressfile(r'D:\1',r'D:\2')