Python–cookbook–6.数据编码与处理
导入对应模块
import csv
from collections import namedtuple, OrderedDict
import json
from urllib.request import urlopen
import xml.etree.ElementTree as ET
from lxml import etree
读取csv
# 读取到列表或元组上去
# with open('stock.csv') as f:
# f_csv = csv.reader(f)
# headings = next(f_csv) # 得到头部
# Row = namedtuple('Row', headings)
# for r in f_csv:
# # print(r) # r是每行元素列表
# row = Row(*r) # 转换成具名元组
# print(row)
# 读取到字典序列中去
# with open('stock.csv') as f:
# f_csv = csv.DictReader(f)
# for row in f_csv:
# print(row) # 转换成字典
写入csv
# 列表或元组写入csv
headers = ['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
rows = [('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800),
('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500),
('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000),
]
row_dic = [{'Symbol': 'AA', 'Price': 39.48, 'Date': '6/11/2007',
'Time': '9:36am', 'Change': -0.18, 'Volume': 181800},
{'Symbol': 'AIG', 'Price': 71.38, 'Date': '6/11/2007',
'Time': '9:36am', 'Change': -0.15, 'Volume': 195500},
{'Symbol': 'AXP', 'Price': 62.58, 'Date': '6/11/2007',
'Time': '9:36am', 'Change': -0.46, 'Volume': 935000},
]
# with open('stocks_write.csv', 'w', newline='') as f: # newline避免空行
# f_csv = csv.writer(f) # 直接写
# f_csv.writerow(headers)
# f_csv.writerows(rows)
# with open('stocks_write.csv', 'w', newline='') as f:
# f_csv = csv.DictWriter(f, headers) # 写字典
# f_csv.writeheader()
# f_csv.writerows(row_dic)
读写JSON数据
data = {'name': 'ACME', 'shares': 100, 'price': 542.23}
# 编码和解码JSON数据其中两个主要的函数是json.dumps()和json.loads()
json_str = json.dumps(data)
data_copy = json.loads(json_str)
# print(json_str, type(json_str))
# print(data_copy, type(data_copy))
处理json文件,使用json.dump()和json.load()
# with open('data.json', 'w') as f:
# json.dump(data, f)
# with open('data.json', 'r') as f:
# data_new = json.load(f)
# print(data_new)
# JSON 编码的格式对于 Python 语法而已几乎是完全一样的
# 除了一些小的差异之外,比如True 会被映射为 true,False 被映射为 false,而 None 会被映射为 null
d = {'a': True, 'b': 'Hello', 'c': None}
print(json.dumps(d))
# 保存为其他对象
# object_pairs_hook object_hook
# data_ord = json.loads(json_str, object_pairs_hook=OrderedDict)
# print(data)
解析简单的XML数据
# u = urlopen('http://planet.python.org/rss20.xml')
# doc = parse(u)
# for item in doc.iterfind('channel/item'):
# title = item.findtext('title')
# date = item.findtext('pubDate')
# link = item.findtext('link')
# print(title)
# print(date)
# print(link)
# print()
# tree = ET.parse('hopt.xml') # ET报错
tree = etree.parse('hopt.xml')
root = tree.getroot()
print(root.note)