3.1csv数据
https://github.com/jackiekazil/data-wrangling
下载本书资料
这个是第三章的资料
ModuleNotFoundError: No module named 'CSV'---不能大写哦!csv
import csv
csvfile = open('data-text.csv','r')#是r而不是rb
reader = csv.reader(csvfile)
for row in reader:
print(row)
_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)
csvfile = open('data-text.csv','r')#是r而不是rb,这是书上的一个小错误。
2.字典的键来自csv文件第一行,后面所有行都是字典的值。
import csv
csvfile = open('data-text.csv','r')
reader = csv.DictReader(csvfile)
for row in reader:
print(row)
3.2JSON数据
print item 错了SyntaxError: Missing parentheses in call to 'print'. Did you mean print(item)?
import json
json_data = open('data-text.json').read()
data = json.loads(json_data)
for item in data:
print (item)
3.3XML数据
数据形式,看的眼花缭乱的!!!
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root
print(root)打印xml的最外层标签为<Element 'GHO' at 0x033D0B70>
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root)
print('root的所有方法和属性',dir(root))
dir(root)==root的所有方法和属性
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('查看根元素的子元素',list(root))
print('查看根元素的子元素',list(root))
修正一下,以便观察
下面获取Data元素的子元素,获取root下的Data元素的子元素
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
print('获取root下的Data元素的子元素',list(data))
输出超长的列表,以]结束,所以是列表。
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素',item)
得到Dim和Value对象。
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的文本内容',item.text)
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的列表内容',list(item))
如果节点的标签之间没有值,那么在标签内通常会有属性。
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的节点属性',item.attrib)
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的节点字典的键',item.attrib.keys())
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key = (item.attrib.keys())[0]
print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)
会产生错误:TypeError: 'dict_keys' object does not support indexing
这是由于python3改变了dict.keys,返回的是dict_keys对象,支持iterable 但不支持indexable,我们可以将其明确的转化成list:
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)
有了键的名字,接下来找 键 对应的值。
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
rec_key = item.attrib[lookup_key]
print('获取root.Data.observation列表的子元素的节点字典的键 的键 对应的值',rec_key)
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
if lookup_key == 'Numeric':
rec_key = 'NUMERIC'
rec_value = item.attrib['Numeric']
else:
rec_key = None
print('rec_key,rec_value',rec_key)
下面将None改成code对应的值
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
if lookup_key == 'Numeric':
rec_key = 'NUMERIC'
rec_value = item.attrib['Numeric']
else:
rec_key = item.attrib[lookup_key]
rec_value = item.attrib['Code']
print('rec_key,rec_value',rec_key,rec_value)
创建字典
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
if lookup_key == 'Numeric':
rec_key = 'NUMERIC'
rec_value = item.attrib['Numeric']
else:
rec_key = item.attrib[lookup_key]
rec_value = item.attrib['Code']
record[rec_key] = rec_value
all_data.append(record)
print(all_data)
不知道 对不对。。