python数据处理-新书-第三章-读取数据

3.1csv数据

https://github.com/jackiekazil/data-wrangling

下载本书资料

 

这个是第三章的资料

ModuleNotFoundError: No module named 'CSV'---不能大写哦!csv

import csv
csvfile = open('data-text.csv','r')#是r而不是rb
reader = csv.reader(csvfile)
for row in reader:
	print(row)

_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)

csvfile = open('data-text.csv','r')#是r而不是rb,这是书上的一个小错误。

2.字典的键来自csv文件第一行,后面所有行都是字典的值。

import csv
csvfile = open('data-text.csv','r')
reader = csv.DictReader(csvfile)
for row in reader:
	print(row)

3.2JSON数据

print item 错了SyntaxError: Missing parentheses in call to 'print'. Did you mean print(item)?

import json
json_data = open('data-text.json').read()
data = json.loads(json_data)
for item in data:
	print (item) 

3.3XML数据

数据形式,看的眼花缭乱的!!!

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root

print(root)打印xml的最外层标签为<Element 'GHO' at 0x033D0B70>

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root)
print('root的所有方法和属性',dir(root))

dir(root)==root的所有方法和属性

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('查看根元素的子元素',list(root))

print('查看根元素的子元素',list(root))

修正一下,以便观察

下面获取Data元素的子元素,获取root下的Data元素的子元素

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
print('获取root下的Data元素的子元素',list(data))

输出超长的列表,以]结束,所以是列表。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素',item)

得到Dim和Value对象。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的文本内容',item.text)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的列表内容',list(item))

如果节点的标签之间没有值,那么在标签内通常会有属性。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的节点属性',item.attrib)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的节点字典的键',item.attrib.keys())

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:
		lookup_key = (item.attrib.keys())[0]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)

会产生错误:TypeError: 'dict_keys' object does not support indexing

这是由于python3改变了dict.keys,返回的是dict_keys对象,支持iterable 但不支持indexable,我们可以将其明确的转化成list:
 

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:
		lookup_key_list = list(item.attrib.keys())
		lookup_key = lookup_key_list[0]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)

有了键的名字,接下来找 键 对应的值。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation: 
		lookup_key_list = list(item.attrib.keys())
		lookup_key = lookup_key_list[0]
		rec_key = item.attrib[lookup_key]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键 对应的值',rec_key)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else:
			rec_key = None	
		print('rec_key,rec_value',rec_key)

下面将None改成code对应的值

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else:
			rec_key = item.attrib[lookup_key]
			rec_value = item.attrib['Code']
		
		print('rec_key,rec_value',rec_key,rec_value)

创建字典

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else: 
			rec_key = item.attrib[lookup_key]
			rec_value = item.attrib['Code']	
		record[rec_key] = rec_value	 
	all_data.append(record)
print(all_data)

不知道 对不对。。

 

 

 

 

 

 

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值