USDA食品数据库:
from pandas import DataFrame,Series
from pylab import *
import pandas as pd
import json
def groupby(ndata):
result = ndata.groupby(['nutrient','groupp'])['value'].quantile(0.5)
result['Zinc, Zn'].sort_values().plot(kind='barh')
show()
def combination(info,nutrients):
ndata = pd.merge(nutrients,info,on='id',how='outer') #连接两个DataFrame
groupby(ndata)
def changename(nutrients,data):
info_keys = ['description','group','id','manufacturer'] #只获取这四列
info = DataFrame(data,columns=info_keys)
rename1 = {'description':'breed','group':'groupp'}
info = info.rename(columns=rename1,copy=False) #为避免两个DataFrame的名字重复修改名字
rename2 = {'description':'nutrient','group':'groupq'}
nutrients = nutrients.rename(columns=rename2,copy=False) #为避免两个DataFrame的名字重复修改名字
print(info)
combination(info,nutrients)
def lists(data):
nutrients = []
for res in data: #把所有的事物的营养项转换为DataFrame
fnuts = DataFrame(res['nutrients'])
fnuts['id'] = res['id']
nutrients.append(fnuts)
nutrients = pd.concat(nutrients,ignore_index=True) #连接列表中所有的项
nutrients = nutrients.drop_duplicates() #去掉重复的数据
changename(nutrients,data)
def decode(path):
data = json.load(open(path)) #解json为python
lists(data)
if __name__=="__main__":
path = r"D:\pythonAnalysis\Python for Data Analysis-1st-edition\pydata-book-1st-edition\ch07\foods-2011-10-03.json"
decode(path) #导入文件路径