方法一:使用codecs
import codecs
f = codecs.open('nlpir/Readme.txt','r','GBK') line = f.readline() while line: print line, line = f.readline() f.close()
上面的方法很慢,可以直接读取整个文件
codecs.open('nlpir/Readme.txt','r','GBK').read()
还有这样读的readlines()
方法二:
#读取文档
def read_file(): path1='K:\\SogouC.reduced\\Reduced\\C000008\\10.txt' f = open(path1,'r+') file_list = f.read().decode("gbk") print file_list f.close()
#逐行读取文档
def read_file_line(): path1='K:\\SogouC.reduced\\Reduced\\C000008\\10.txt' f = open(path1,'r+') line = f.readline() while line: print line.decode('gbk') line = f.readline() f.close()
带bom头
def readWeibo(): f = codecs.open(u'H:/数据/weibo/weibo.txt','r','utf-8') line = f.readline() if line[:3] == codecs.BOM_UTF8: line = line[3:]#去除bom头 i=0 while line: print line, line = f.readline() if i>10: break f.close() return readWeibo()
显示行数的读取def test_read(): file_path = u"D:/dev_data/idf_data/weibo/weibo_text.txt" f = open(file_path, "r") for i,line in enumerate(f): print i,line, if i>10: break; f.close()