Python学习——Pandas

import  pandas as pd
import numpy as np
df=pd.read_csv("export.csv")
df
分类食物名重量蛋白质脂肪膳食纤维碳水化物
0谷物类稻米262.1927230.1926910.13200819.8791200.7681671.976958
1谷物类稻谷(早籼)162.4818940.5515320.35097518.7520890.4011143.259053
2谷物类大麦(元麦)292.9902280.4104232.90228018.5863190.4690553.811075
3谷物类大黄米(黍)263.5071630.6962750.90257917.4326650.4383957.736390
4谷物类方便面191.8114414.0233050.13347511.612288218.1355934.766949
..............................
676豆乳类青豆(青大豆)242.2922258.3485253.8605903.0402142.4345840.434316
677豆乳类酸豆乳134113.5074632.9552241.6119400.0000001.49104524.985075
678豆乳类素大肠5937.05882410.6470592.1176470.5882350.00000085.117647
679豆乳类素火腿4323.4597168.1469195.6303320.38388611.085782288.298578
680豆乳类稻米3(粳,特级)271.9700000.1100000.11000020.2900001.6700006.470000

681 rows × 9 columns

df['重量']
0       26
1       16
2       29
3       26
4       19
      ... 
676     24
677    134
678     59
679     43
680     27
Name: 重量, Length: 681, dtype: int64
修改索引值(可以自己指定,方便看),默认是012……
df=df.set_index('食物名')
df
分类重量蛋白质脂肪膳食纤维碳水化物
食物名
稻米谷物类262.1927230.1926910.13200819.8791200.7681671.976958
稻谷(早籼)谷物类162.4818940.5515320.35097518.7520890.4011143.259053
大麦(元麦)谷物类292.9902280.4104232.90228018.5863190.4690553.811075
大黄米(黍)谷物类263.5071630.6962750.90257917.4326650.4383957.736390
方便面谷物类191.8114414.0233050.13347511.612288218.1355934.766949
...........................
青豆(青大豆)豆乳类242.2922258.3485253.8605903.0402142.4345840.434316
酸豆乳豆乳类134113.5074632.9552241.6119400.0000001.49104524.985075
素大肠豆乳类5937.05882410.6470592.1176470.5882350.00000085.117647
素火腿豆乳类4323.4597168.1469195.6303320.38388611.085782288.298578
稻米3(粳,特级)豆乳类271.9700000.1100000.11000020.2900001.6700006.470000

681 rows × 8 columns

df['重量']
食物名
稻米            26
稻谷(早籼)        16
大麦(元麦)        29
大黄米(黍)        26
方便面           19
            ... 
青豆(青大豆)       24
酸豆乳          134
素大肠           59
素火腿           43
稻米3(粳,特级)     27
Name: 重量, Length: 681, dtype: int64
获取两列
df[['重量','钠']]
重量
食物名
稻米260.768167
稻谷(早籼)160.401114
大麦(元麦)290.469055
大黄米(黍)260.438395
方便面19218.135593
.........
青豆(青大豆)242.434584
酸豆乳1341.491045
素大肠590.000000
素火腿4311.085782
稻米3(粳,特级)271.670000

681 rows × 2 columns

#不能通过索引值来获取某一行的或者指定位置的元素
# df['稻米']    df['稻米']['重量']  这样子写都是会报错的

#索引只能获取某一列,然后通过这一列再去找
weight=df['重量']
weight['稻米']
26
loc 用label来定位
iloc 用position来定位
df.loc['稻米']['重量']
26
df.loc['稻米':'方便面']
分类重量蛋白质脂肪膳食纤维碳水化物
食物名
稻米谷物类262.1927230.1926910.13200819.8791200.7681671.976958
稻谷(早籼)谷物类162.4818940.5515320.35097518.7520890.4011143.259053
大麦(元麦)谷物类292.9902280.4104232.90228018.5863190.4690553.811075
大黄米(黍)谷物类263.5071630.6962750.90257917.4326650.4383957.736390
方便面谷物类191.8114414.0233050.13347511.612288218.1355934.766949
df.iloc[0]
分类           谷物类
重量            26
蛋白质      2.19272
脂肪      0.192691
膳食纤维    0.132008
碳水化物     19.8791
钠       0.768167
钙        1.97696
Name: 稻米, dtype: object
df.iloc[0:3]
分类重量蛋白质脂肪膳食纤维碳水化物
食物名
稻米谷物类262.1927230.1926910.13200819.8791200.7681671.976958
稻谷(早籼)谷物类162.4818940.5515320.35097518.7520890.4011143.259053
大麦(元麦)谷物类292.9902280.4104232.90228018.5863190.4690553.811075
##获取指定列
df.iloc[0:3,1:3]
重量蛋白质
食物名
稻米262.192723
稻谷(早籼)162.481894
大麦(元麦)292.990228

筛选

df[df['分类']=='肉类']
##中括号里面是个Bool类型的,然后再传回df中就可以了(即只会把True的显示出来)
分类重量蛋白质脂肪膳食纤维碳水化物
食物名
肉类3014.2076343.7820827.7249120.00.089103279.045720
叉烧肉肉类3215.8709687.6774195.4516130.00.219355264.129032
方腿肉类7756.84615412.4615383.8461540.00.115385326.538462
宫爆肉丁(罐头)肉类2711.9196434.7410717.3928570.00.404464126.401786
狗肉肉类6258.96551713.0344833.5689660.01.08620736.775862
...........................
田鸡(青蛙)肉类3676.83871019.8387101.1612900.00.53225811.419355
田鸡腿(青蛙腿)肉类4093.07594913.4430381.5949370.00.660759245.164557
蝎子肉类5124.61016913.3220342.3898310.03.85932258.830508
中国鲎肉类97120.14285714.7142862.1428570.03.2857140.000000
炸鸡(肯德鸡)肉类2315.9354846.5483875.5806450.02.077419243.548387

262 rows × 8 columns

df[df['分类']=='肉类']['重量']
56.36641221374046

基本统计指标

df.describe()
重量蛋白质脂肪膳食纤维碳水化物
count681.000000681.000000681.000000681.000000681.000000681.000000681.000000
mean122.756241112.1195497.0824902.3808225.15733434.052483557.301880
std211.080287223.2026805.9832328.3297387.563437561.6861122684.146695
min7.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%30.0000005.5180331.7560980.2977940.0000000.1239136.000000
50%47.00000036.0000005.7272731.0000001.1392410.91698127.900000
75%136.000000105.30000011.4600002.8555737.7023283.323077104.610390
max3000.0000002892.00000045.000000180.97297357.60000014142.91304032286.666670

groupby

df.groupby('分类').sum()
重量蛋白质脂肪膳食纤维碳水化物
分类
水果类78707749.405556104.54580043.734145212.281453142.0636661145.775009
肉类1476813889.1158363110.533468915.0635120.000000384.59246949096.447432
蔬菜类4943951871.1362191150.082174162.7898941095.08477714691.616430324695.914233
谷物类8380378.092086231.878966401.5885532141.5760947733.7839972057.292955
豆乳类31402465.662844226.13533098.16396463.201943237.6840292527.150658

协方差 相关系数

df.cov()
重量蛋白质脂肪膳食纤维碳水化物
重量44554.88755341182.411708233.306873323.478724413.00756913629.4796428.990307e+04
蛋白质41182.41170849819.436385289.770895-81.337915393.438141-137.1692586.135819e+04
脂肪233.306873289.77089535.7990610.114819-14.506120-37.1355255.362664e+02
膳食纤维323.478724-81.3379150.11481969.384535-4.586858594.002972-5.682174e+02
碳水化物413.007569393.438141-14.506120-4.58685857.20557720.115411-1.969337e+02
13629.479642-137.169258-37.135525594.00297220.115411315491.2881592.764420e+05
89903.07488661358.194926536.266377-568.217363-196.933720276441.9658797.204643e+06
df.corr()
重量蛋白质脂肪膳食纤维碳水化物
重量1.0000000.8741070.1847330.1839780.2586970.1149580.158679
蛋白质0.8741071.0000000.216980-0.0437480.233055-0.0010940.102416
脂肪0.1847330.2169801.0000000.002304-0.320550-0.0110500.033392
膳食纤维0.183978-0.0437480.0023041.000000-0.0728060.126959-0.025414
碳水化物0.2586970.233055-0.320550-0.0728061.0000000.004735-0.009701
0.114958-0.001094-0.0110500.1269590.0047351.0000000.183360
0.1586790.1024160.033392-0.025414-0.0097010.1833601.000000

统计一样的值出现的次数

df.value_counts('分类')
分类
肉类     262
蔬菜类    175
谷物类    134
水果类     77
豆乳类     33
dtype: int64

升序

df.value_counts('分类',ascending=True)
分类
豆乳类     33
水果类     77
谷物类    134
蔬菜类    175
肉类     262
dtype: int64
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值