import pandas #导入pandas库
food_info = pandas.read_csv("food_info.csv") #pandas中的read函数
print (type(food_info))
print (food_info.dtypes) #使用dtype函数打印数据的基本格式
print (help(pandas.read_csv))
#输出
<class 'pandas.core.frame.DataFrame'> #pandas的核心格式为DataFrame
NDB_No int64
Shrt_Desc object #在pandas中,string类型的数据显示为object格式
Water_(g) float64
Energ_Kcal int64
Protein_(g) float64
Lipid_Tot_(g) float64
Ash_(g) float64
Carbohydrt_(g) float64
Fiber_TD_(g) float64
Sugar_Tot_(g) float64
Calcium_(mg) float64
Iron_(mg) float64
Magnesium_(mg) float64
Phosphorus_(mg) float64
Potassium_(mg) float64
Sodium_(mg) float64
Zinc_(mg) float64
一、pandas中的基本操作
food_info.head(3) #head命令可以从上往下打印制定行数的数据
#输出
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) ... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg)
0 1001 BUTTER WITH SALT 15.87 717 0.85 81.11 2.11 0.06 0.0 0.06 ... 2499.0 684.0 2.32 1.5 60.0 7.0 51.368 21.021 3.043 215.0
1 1002 BUTTER WHIPPED WITH SALT 15.87 717 0.85 81.11 2.11 0.06 0.0 0.06 ... 2499.0 684.0 2.32 1.5 60.0 7.0 50.489 23.426 3.012 219.0
2 1003 BUTTER OIL ANHYDROUS 0.24 876 0.28 99.48 0.00 0.00 0.0 0.00 ... 3069.0 840.0 2.80 1.8 73.0 8.6 61.924 28.732 3.694 256.0
food_info.tail(4) #tail命令可以从下往上打印指定行数的数据
#输出
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) ... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg)
8614 90240 SCALLOP (BAY&SEA) CKD STMD 70.25 111 20.54 0.84 2.97 5.41 0.0 0.0 ... 5.0 2.0 0.0 0.0 2.0 0.0 0.218 0.082 0.222 41.0
8615 90480 SYRUP CANE 26.00 269 0.00 0.00 0.86 73.14 0.0 73.2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.000 0.000 0.000 0.0
8616 90560 SNAIL RAW 79.20 90 16.10 1.40 1.30 2.00 0.0 0.0 ... 100.0 30.0 5.0 0.0 0.0 0.1 0.361 0.259 0.252 50.0
8617 93600 TURTLE GREEN RAW 78.50 89 19.80 0.50 1.20 0.00 0.0 0.0 ... 100.0 30.0 0.5 0.0 0.0 0.1 0.127 0.088 0.170 50.0
print (food_info.columns) #columns命令可以打印数据首行,一般数据为数据名称
#输出
Index(['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)',
'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)',
'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)',
'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)',
'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)',
'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)',
'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg',
'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)',
'Cholestrl_(mg)'],
dtype='object')
print (food_info.shape) #shape命令可以打印数据的规格大小
#输出
(8618, 36)
print (food_info.loc[0]) 通过loc命令来定位指定行数的数据
#输出
NDB_No 1001
Shrt_Desc BUTTER WITH SALT
Water_(g) 15.87
Energ_Kcal 717
Protein_(g) 0.85
Lipid_Tot_(g) 81.11
Ash_(g) 2.11
Carbohydrt_(g) 0.06
Fiber_TD_(g) 0
Sugar_Tot_(g) 0.06
Calcium_(mg) 24
Iron_(mg) 0.02
Magnesium_(mg) 2
Phosphorus_(mg) 24
Potassium_(mg) 24
Sodium_(mg) 643
Zinc_(mg) 0.09
Copper_(mg) 0
Manganese_(mg) 0
Selenium_(mcg) 1
Vit_C_(mg) 0
Thiamin_(mg) 0.005
Riboflavin_(mg) 0.034
Niacin_(mg) 0.042
Vit_B6_(mg) 0.003
Vit_B12_(mcg) 0.17
Vit_A_IU 2499
Vit_A_RAE 684
Vit_E_(mg) 2.32
Vit_D_mcg 1.5
Vit_D_IU 60
Vit_K_(mcg) 7
FA_Sat_(g) 51.368
FA_Mono_(g) 21.021
FA_Poly_(g) 3.043
Cholestrl_(mg) 215
Name: 0, dtype: object
print (food_info.loc[3:6]) #同样在传递位置参数的时候可以设置一个范围,包括首尾