首先,在python下,打开文件,文件是一个迭代器对象,因此可以通过in获取
<class '_io.TextIOWrapper'>
注意:我这里使用的txt文档中存放的是json文件
这里可以通过如下代码查看:
import json
path= r"E:\tf_project\利用python进行数据分析--数据\ch02\usagov_bitly_data2012-03-16-1331923249.txt"
f = open(path)
result = [line for line in f] #可以直接从文件中读取,这是怎么回事
print(type(f))
from collections import Iterator
print(isinstance(f, Iterator))
输出结果如下:
<class '_io.TextIOWrapper'> True
也可以通过逐行读取的方式实现。
with open(path) as f: #对于json文件,可以采用这种方式读取,转换为字典
line = f.readline() #使用f.readline()得到的是字符串
result = json.loads(line) #将字符串转换为字典
result
{'a': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',
'al': 'en-US,en;q=0.8',
'c': 'US',
'cy': 'Danvers',
'g': 'A6qOVH',
'gr': 'MA',
'h': 'wfLQtf',
'hc': 1331822918,
'hh': '1.usa.gov',
'l': 'orofrog',
'll': [42.576698, -70.954903],
'nk': 1,
'r': 'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',
't': 1331923247,
'tz': 'America/New_York',
'u': 'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}
import json
path= r"E:\tf_project\利用python进行数据分析--数据\ch02\usagov_bitly_data2012-03-16-1331923249.txt"
records = [json.loads(line) for line in open(path)]
records[0]
{'a': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',
'al': 'en-US,en;q=0.8',
'c': 'US',
'cy': 'Danvers',
'g': 'A6qOVH',
'gr': 'MA',
'h': 'wfLQtf',
'hc': 1331822918,
'hh': '1.usa.gov',
'l': 'orofrog',
'll': [42.576698, -70.954903],
'nk': 1,
'r': 'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',
't': 1331923247,
'tz': 'America/New_York',
'u': 'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}
f.readlines()读取文件,一次性读取整个文件,但是会把每一行作为一个字符串。
import json
f = open(path)
result = f.readlines()
print(result)
for i in range(len(result)):
print(result[i])
['{ "a": "Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/535.11 (KHTML, like Gecko) Chrome\\/17.0.963.78 Safari\\/535.11", "c": "US", "nk": 1, "tz": "America\\/New_York", "gr": "MA", "g": "A6qOVH", "h": "wfLQtf", "l": "orofrog", "al": "en-US,en;q=0.8", "hh": "1.usa.gov", "r": "http:\\/\\/www.facebook.com\\/l\\/7AQEFzjSi\\/1.usa.gov\\/wfLQtf", "u": "http:\\/\\/www.ncbi.nlm.nih.gov\\/pubmed\\/22415991", "t": 1331923247, "hc": 1331822918, "cy": "Danvers", "ll": [ 42.576698, -70.954903 ] }\n']
{ "a": "Mozilla\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\/535.11 (KHTML, like Gecko) Chrome\/17.0.963.78 Safari\/535.11", "c": "US", "nk": 1, "tz": "America\/New_York", "gr": "MA", "g": "A6qOVH", "h": "wfLQtf", "l": "orofrog", "al": "en-US,en;q=0.8", "hh": "1.usa.gov", "r": "http:\/\/www.facebook.com\/l\/7AQEFzjSi\/1.usa.gov\/wfLQtf", "u": "http:\/\/www.ncbi.nlm.nih.gov\/pubmed\/22415991", "t": 1331923247, "hc": 1331822918, "cy": "Danvers", "ll": [ 42.576698, -70.954903 ] }
{ "a": "GoogleMaps\/RochesterNY", "c": "US", "nk": 0, "tz": "America\/Denver", "gr": "UT", "g": "mwszkS", "h": "mwszkS", "l": "bitly", "hh": "j.mp", "r": "http:\/\/www.AwareMap.com\/", "u": "http:\/\/www.monroecounty.gov\/etc\/911\/rss.php", "t": 1331923249, "hc": 1308262393, "cy": "Provo", "ll": [ 40.218102, -111.613297 ] }
{ "a": "Mozilla\/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident\/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3)", "c": "US", "nk": 1, "tz": "America\/New_York", "gr": "DC", "g": "xxr3Qb", "h": "xxr3Qb", "l": "bitly", "al": "en-US", "hh": "1.usa.gov", "r": "http:\/\/t.co\/03elZC4Q", "u": "http:\/\/boxer.senate.gov\/en\/press\/releases\/031612.cfm", "t": 1331923250, "hc": 1331919941, "cy": "Washington", "ll": [ 38.900700, -77.043098 ] }
{ "a": "Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit\/534.52.7 (KHTML, like Gecko) Version\/5.1.2 Safari\/534.52.7", "c": "BR", "nk": 0, "tz": "America\/Sao_Paulo", "gr": "27", "g": "zCaLwp", "h": "zUtuOu", "l": "alelex88", "al": "pt-br", "hh": "1.usa.gov", "r": "direct", "u": "http:\/\/apod.nasa.gov\/apod\/ap120312.html", "t": 1331923249, "hc": 1331923068, "cy": "Braz", "ll": [ -23.549999, -46.616699 ] }