python数据处理-新书-第三章-读取数据

最新推荐文章于 2022-11-13 16:00:04 发布

自渡2021

最新推荐文章于 2022-11-13 16:00:04 发布

阅读量921

点赞数 1

分类专栏： python数据处理

本文链接：https://blog.csdn.net/qq_34318539/article/details/90698514

版权

python数据处理专栏收录该内容

6 篇文章 1 订阅

订阅专栏

3.1csv数据

https://github.com/jackiekazil/data-wrangling

下载本书资料

这个是第三章的资料

ModuleNotFoundError: No module named 'CSV'---不能大写哦！csv

import csv
csvfile = open('data-text.csv','r')#是r而不是rb
reader = csv.reader(csvfile)
for row in reader:
	print(row)

_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)

csvfile = open('data-text.csv','r')#是r而不是rb，这是书上的一个小错误。

2.字典的键来自csv文件第一行，后面所有行都是字典的值。

import csv
csvfile = open('data-text.csv','r')
reader = csv.DictReader(csvfile)
for row in reader:
	print(row)

3.2JSON数据

print item 错了SyntaxError: Missing parentheses in call to 'print'. Did you mean print(item)?

import json
json_data = open('data-text.json').read()
data = json.loads(json_data)
for item in data:
	print (item)

3.3XML数据

数据形式，看的眼花缭乱的！！！

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root

print(root)打印xml的最外层标签为<Element 'GHO' at 0x033D0B70>

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root)
print('root的所有方法和属性',dir(root))

dir（root）==root的所有方法和属性

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('查看根元素的子元素',list(root))

print('查看根元素的子元素',list(root))

修正一下，以便观察

下面获取Data元素的子元素，获取root下的Data元素的子元素

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
print('获取root下的Data元素的子元素',list(data))

输出超长的列表，以]结束，所以是列表。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素',item)

得到Dim和Value对象。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的文本内容',item.text)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的列表内容',list(item))

如果节点的标签之间没有值，那么在标签内通常会有属性。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的节点属性',item.attrib)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的节点字典的键',item.attrib.keys())

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:
		lookup_key = (item.attrib.keys())[0]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)

会产生错误：TypeError: 'dict_keys' object does not support indexing

这是由于python3改变了dict.keys,返回的是dict_keys对象,支持iterable 但不支持indexable，我们可以将其明确的转化成list：

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:
		lookup_key_list = list(item.attrib.keys())
		lookup_key = lookup_key_list[0]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)

有了键的名字，接下来找键对应的值。

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation: 
		lookup_key_list = list(item.attrib.keys())
		lookup_key = lookup_key_list[0]
		rec_key = item.attrib[lookup_key]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键 对应的值',rec_key)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else:
			rec_key = None	
		print('rec_key,rec_value',rec_key)

下面将None改成code对应的值

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else:
			rec_key = item.attrib[lookup_key]
			rec_value = item.attrib['Code']
		
		print('rec_key,rec_value',rec_key,rec_value)

创建字典

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else: 
			rec_key = item.attrib[lookup_key]
			rec_value = item.attrib['Code']	
		record[rec_key] = rec_value	 
	all_data.append(record)
print(all_data)