读文件:
定义一个简单的文本处理函数:
def process(text):
print "PROCESS:", text
按字节读取:
def process(text):
print "PROCESS:", text
with open("tmp.txt") as f:
while True:
text = f.read(10)
if not text:
break
process(text)
按行读取:
with open("tmp.txt") as f:
while True:
text = f.readline()
if not text:
break
process(text)
读取所有内容,返回整个字符串文本:
with open("tmp.txt") as f:
text = f.read()
process(text)
f.close()
读取所有内容,返回一个列表,列表的每个元素就是一行内容:
with open("tmp.txt") as f:
for line in f.readlines():
process(line)
fileinput懒惰行迭代:
当要读取的文件的非常大时,readlines会占有太多的内存,fileinput方法性能更好。
import fileinput
for line in fileinput.input("tmp.txt"):
process(line)
文件迭代器(最佳实践):
with open("tmp.txt") as f:
for line in f:
process(line)
write:
try:
with open("tmp.txt", "w") as f:
while True:
text = raw_input("Please Input: ")
if not text:
break
f.write(text)
except EOFError, e:
pass
writelines:
tmp_line = ["hello world", "hi python", "good bye"]
with open("tmp.txt", "w") as f:
# writelines并不会自动写入换行符
f.writelines(tmp_line)
读写综合(文件拷贝示例cp.py):
from sys import argv
from sys import exit
if len(argv) != 3:
exit(1)
try:
with open(argv[1], "rb") as from_file, open(argv[2], "wb") as to_file:
while True:
tmp = from_file.read(1024)
if not tmp:
break
to_file.write(tmp)
except IOError as err:
print "File Error:", str(err)
序列化:
import cPickle as p
with open("tmp.dat", "w") as f:
p.dump(["apple", "mango", "orange"], f)
with open("tmp.dat") as f:
print p.load(f)
随机访问:
# -*- coding: utf-8 -*-
# seek(offset [, whence]) 把当前位置移动到由offset定义的位置。
# whence的值为0时(默认),表示相对位置是文件开头处;
# whence的值为1时,表示相对位置是当前位置;
# whence的值为2时,表示相对位置是文件结尾处。
# tell方法返回当前文件的位置。
with open("tmp.txt") as f:
print f.read(10)
print "pos:", f.tell()
f.seek(0)
print "pos:", f.tell()
print f.read()
print "pos:", f.tell()
os.path模块:
遍历目录:
# -*- coding: utf-8 -*-
import sys
import os
def formate_filename(filename, deep = 0):
tab = ''
d = 0
while d < deep:
tab += ' '
d += 1
return tab + os.path.basename(filename)
def list_dir(dirname, deep = 0):
if not os.path.exists(dirname):
print dirname, 'is not existed'
sys.exit(1)
if os.path.isfile(dirname):
print formate_filename(dirname, deep)
if os.path.isdir(dirname):
print formate_filename(dirname, deep) + ":"
# 列出目录的所有文件和子目录
filenames = os.listdir(dirname)
for filename in filenames:
list_dir(dirname + os.sep + filename, deep + 1)
if len(sys.argv) < 2:
sys.exit(1)
del sys.argv[0]
for dirname in sys.argv:
list_dir(dirname)
print