python 数据分析案例(一)

元数据 https://github.com/chrisrui/pydata-book

import json
import pandas as pd
import matplotlib.pyplot as plt

db=json.load(open('/home/baba/database.json'))
len(db)

db

 

  'group': 'Composition'},
   {'value': 4.74,
    'units': 'g',
    'description': 'Carbohydrate, by difference',
    'group': 'Composition'},
   {'value': 0.8, 'units': 'g', 'description': 'Ash', 'group': 'Other'},
   {'value': 63.0,
    'units': 'kcal',
    'description': 'Energy',
    'group': 'Energy'},
   {'value': 87.67,
    'units': 'g',
    'description': 'Water',
    'group': 'Composition'},
   {'value': 264.0, 'units': 'kJ', 'description': 'Energy', 'group': 'Energy'},
   {'value': 0.0,
    'units': 'g',

 

 

db[0].keys()



db[0]['nutrients'][0]


nutrients=pd.DataFrame(db[0]['nutrients'])
nutrients[:7]



info_keys=['description','group','id','manufacturer']
info_db=pd.DataFrame(db,columns=info_keys)
info_db[:5
value	units	description	group
0	25.18	g	Protein	Composition
1	29.20	g	Total lipid (fat)	Composition
2	3.06	g	Carbohydrate, by difference	Composition
3	3.28	g	Ash	Other
4	376.00	kcal	Energy	Energy
5	39.28	g	Water	Composition
6	1573.00	kJ	Energy	Energy]
info_db.info()

 

pd.value_counts(info_db.group)[:10]

 

nutrients_list=[]
for rec in db:
    fnuts=pd.DataFrame(rec['nutrients'])
    fnuts['id']=rec['id']
    nutrients_list.append(fnuts)
nutrients_list=pd.concat(nutrients_list,ignore_index=True)
nutrients_list

nutrients_list.duplicated().sum()

nutrients_list=nutrients_list.drop_duplicates()


col1={'description':'food',
      'group':'fgroup'}
info_db=info_db.rename(columns=col1,copy=False)
info_db.info()


co2={'description':'nutrient',
    'group':'nutgroup'}
nutrients_list=nutrients_list.rename(columns=co2,copy=False)
nutrients_list

ndata=pd.merge(nutrients_list,info_db,on='id',how='outer')
ndata.info()
ndata.iloc[30000]


fig=plt.figure()
result=ndata.groupby(['nutrient','fgroup'])['value'].quantile(0.5)
result['Zinc, Zn'].sort_values().plot(kind='barh')


by_nutrietn=ndata.groupby(['nutgroup','nutrient'])
get_maximum=lambda x: x.loc[x.value.idxmax()]
get_minimum=lambda x: x.loc[x.value.idxmin()]
max_foods=by_nutrietn.apply(get_maximum)[['value','food']]
max_foods.food=max_foods[:50]

max_foods.loc['Amino Acids']['food']

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值