import pandas
food_info = pandas. read_csv( "food_info.csv" )
print ( type ( food_info) )
print food_info
print food_info. dtypes
<class 'pandas.core.frame.DataFrame'>
NDB_No Shrt_Desc Water_(g) \
0 1001 BUTTER WITH SALT 15.87
1 1002 BUTTER WHIPPED WITH SALT 15.87
2 1003 BUTTER OIL ANHYDROUS 0.24
3 1004 CHEESE BLUE 42.41
4 1005 CHEESE BRICK 41.11
5 1006 CHEESE BRIE 48.42
6 1007 CHEESE CAMEMBERT 51.80
7 1008 CHEESE CARAWAY 39.28
8 1009 CHEESE CHEDDAR 37.10
9 1010 CHEESE CHESHIRE 37.65
10 1011 CHEESE COLBY 38.20
Energ_Kcal Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) \
0 717 0.85 81.11 2.11 0.06
1 717 0.85 81.11 2.11 0.06
2 876 0.28 99.48 0.00 0.00
3 353 21.40 28.74 5.11 2.34
4 371 23.24 29.68 3.18 2.79
5 334 20.75 27.68 2.70 0.45
6 300 19.80 24.26 3.68 0.46
7 376 25.18 29.20 3.28 3.06
8 406 24.04 33.82 3.71 1.33
9 387 23.37 30.60 3.60 4.78
10 394 23.76 32.11 3.36 2.57
Fiber_TD_(g) Sugar_Tot_(g) ... Vit_A_IU Vit_A_RAE \
0 0.0 0.06 ... 2499.0 684.0
1 0.0 0.06 ... 2499.0 684.0
2 0.0 0.00 ... 3069.0 840.0
3 0.0 0.50 ... 721.0 198.0
4 0.0 0.51 ... 1080.0 292.0
5 0.0 0.45 ... 592.0 174.0
6 0.0 0.46 ... 820.0 241.0
7 0.0 NaN ... 1054.0 271.0
8 0.0 0.28 ... 994.0 263.0
9 0.0 NaN ... 985.0 233.0
10 0.0 0.52 ... 994.0 264.0
Vit_E_(mg) Vit_D_mcg Vit_D_IU Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) \
0 2.32 1.5 60.0 7.0 51.368 21.021
1 2.32 1.5 60.0 7.0 50.489 23.426
2 2.80 1.8 73.0 8.6 61.924 28.732
3 0.25 0.5 21.0 2.4 18.669 7.778
4 0.26 0.5 22.0 2.5 18.764 8.598
5 0.24 0.5 20.0 2.3 17.410 8.013
6 0.21 0.4 18.0 2.0 15.259 7.023
7 NaN NaN NaN NaN 18.584 8.275
8 0.78 0.6 24.0 2.9 19.368 8.428
9 NaN NaN NaN NaN 19.475 8.671
10 0.28 0.6 24.0 2.7 20.218 9.280
FA_Poly_(g) Cholestrl_(mg)
0 3.043 215.0
1 3.012 219.0
2 3.694 256.0
3 0.800 75.0
4 0.784 94.0
5 0.826 100.0
6 0.724 72.0
7 0.830 93.0
8 1.433 102.0
9 0.870 103.0
10 0.953 95.0
[8618 rows x 36 columns]
NDB_No int64
Shrt_Desc object
Water_(g) float64
Energ_Kcal int64
Protein_(g) float64
Lipid_Tot_(g) float64
Ash_(g) float64
Carbohydrt_(g) float64
Fiber_TD_(g) float64
Sugar_Tot_(g) float64
Calcium_(mg) float64
Iron_(mg) float64
Magnesium_(mg) float64
Phosphorus_(mg) float64
Potassium_(mg) float64
Sodium_(mg) float64
Zinc_(mg) float64
Copper_(mg) float64
Manganese_(mg) float64
Selenium_(mcg) float64
Vit_C_(mg) float64
Thiamin_(mg) float64
Riboflavin_(mg) float64
Niacin_(mg) float64
Vit_B6_(mg) float64
Vit_B12_(mcg) float64
Vit_A_IU float64
Vit_A_RAE float64
Vit_E_(mg) float64
Vit_D_mcg float64
Vit_D_IU float64
Vit_K_(mcg) float64
FA_Sat_(g) float64
FA_Mono_(g) float64
FA_Poly_(g) float64
Cholestrl_(mg) float64
dtype: object
first_rows = food_info. head( )
print first_rows
print ( food_info. head( 3 ) )
print food_info. columns
print food_info. shape
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) \
0 1001 BUTTER WITH SALT 15.87 717 0.85
1 1002 BUTTER WHIPPED WITH SALT 15.87 717 0.85
2 1003 BUTTER OIL ANHYDROUS 0.24 876 0.28
3 1004 CHEESE BLUE 42.41 353 21.40
4 1005 CHEESE BRICK 41.11 371 23.24
Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) \
0 81.11 2.11 0.06 0.0 0.06
1 81.11 2.11 0.06 0.0 0.06
2 99.48 0.00 0.00 0.0 0.00
3 28.74 5.11 2.34 0.0 0.50
4 29.68 3.18 2.79 0.0 0.51
... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU \
0 ... 2499.0 684.0 2.32 1.5 60.0
1 ... 2499.0 684.0 2.32 1.5 60.0
2 ... 3069.0 840.0 2.80 1.8 73.0
3 ... 721.0 198.0 0.25 0.5 21.0
4 ... 1080.0 292.0 0.26 0.5 22.0
Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg)
0 7.0 51.368 21.021 3.043 215.0
1 7.0 50.489 23.426 3.012 219.0
2 8.6 61.924 28.732 3.694 256.0
3 2.4 18.669 7.778 0.800 75.0
4 2.5 18.764 8.598 0.784 94.0
[5 rows x 36 columns]
print food_info. loc[ 0 ]
food_info. loc[ 6 ]
NDB_No 1001
Shrt_Desc BUTTER WITH SALT
Water_(g) 15.87
Energ_Kcal 717
Protein_(g) 0.85
Lipid_Tot_(g) 81.11
Ash_(g) 2.11
Carbohydrt_(g) 0.06
Fiber_TD_(g) 0
Sugar_Tot_(g) 0.06
Calcium_(mg) 24
Iron_(mg) 0.02
Magnesium_(mg) 2
Phosphorus_(mg) 24
Potassium_(mg) 24
Sodium_(mg) 643
Zinc_(mg) 0.09
Copper_(mg) 0
Manganese_(mg) 0
Selenium_(mcg) 1
Vit_C_(mg) 0
Thiamin_(mg) 0.005
Riboflavin_(mg) 0.034
Niacin_(mg) 0.042
Vit_B6_(mg) 0.003
Vit_B12_(mcg) 0.17
Vit_A_IU 2499
Vit_A_RAE 684
Vit_E_(mg) 2.32
Vit_D_mcg 1.5
Vit_D_IU 60
Vit_K_(mcg) 7
FA_Sat_(g) 51.368
FA_Mono_(g) 21.021
FA_Poly_(g) 3.043
Cholestrl_(mg) 215
Name: 0, dtype: object
NDB_No 1007
Shrt_Desc CHEESE CAMEMBERT
Water_(g) 51.8
Energ_Kcal 300
Protein_(g) 19.8
Lipid_Tot_(g) 24.26
Ash_(g) 3.68
Carbohydrt_(g) 0.46
Fiber_TD_(g) 0
Sugar_Tot_(g) 0.46
Calcium_(mg) 388
Iron_(mg) 0.33
Magnesium_(mg) 20
Phosphorus_(mg) 347
Potassium_(mg) 187
Sodium_(mg) 842
Zinc_(mg) 2.38
Copper_(mg) 0.021
Manganese_(mg) 0.038
Selenium_(mcg) 14.5
Vit_C_(mg) 0
Thiamin_(mg) 0.028
Riboflavin_(mg) 0.488
Niacin_(mg) 0.63
Vit_B6_(mg) 0.227
Vit_B12_(mcg) 1.3
Vit_A_IU 820
Vit_A_RAE 241
Vit_E_(mg) 0.21
Vit_D_mcg 0.4
Vit_D_IU 18
Vit_K_(mcg) 2
FA_Sat_(g) 15.259
FA_Mono_(g) 7.023
FA_Poly_(g) 0.724
Cholestrl_(mg) 72
Name: 6, dtype: object
print ( food_info. dtypes)
NDB_No int64
Shrt_Desc object
Water_(g) float64
Energ_Kcal int64
Protein_(g) float64
Lipid_Tot_(g) float64
Ash_(g) float64
Carbohydrt_(g) float64
Fiber_TD_(g) float64
Sugar_Tot_(g) float64
Calcium_(mg) float64
Iron_(mg) float64
Magnesium_(mg) float64
Phosphorus_(mg) float64
Potassium_(mg) float64
Sodium_(mg) float64
Zinc_(mg) float64
Copper_(mg) float64
Manganese_(mg) float64
Selenium_(mcg) float64
Vit_C_(mg) float64
Thiamin_(mg) float64
Riboflavin_(mg) float64
Niacin_(mg) float64
Vit_B6_(mg) float64
Vit_B12_(mcg) float64
Vit_A_IU float64
Vit_A_RAE float64
Vit_E_(mg) float64
Vit_D_mcg float64
Vit_D_IU float64
Vit_K_(mcg) float64
FA_Sat_(g) float64
FA_Mono_(g) float64
FA_Poly_(g) float64
Cholestrl_(mg) float64
dtype: object
food_info. loc[ 3 : 6 ]
food_info. loc[ [ 2 , 5 , 10 ] ]
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) ... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg) 2 1003 BUTTER OIL ANHYDROUS 0.24 876 0.28 99.48 0.00 0.00 0.0 0.00 ... 3069.0 840.0 2.80 1.8 73.0 8.6 61.924 28.732 3.694 256.0 5 1006 CHEESE BRIE 48.42 334 20.75 27.68 2.70 0.45 0.0 0.45 ... 592.0 174.0 0.24 0.5 20.0 2.3 17.410 8.013 0.826 100.0 10 1011 CHEESE COLBY 38.20 394 23.76 32.11 3.36 2.57 0.0 0.52 ... 994.0 264.0 0.28 0.6 24.0 2.7 20.218 9.280 0.953 95.0
3 rows × 36 columns
ndb_col = food_info[ "NDB_No" ]
print ndb_col
col_name = "NDB_No"
ndb_col = food_info[ col_name]
0 1001
1 1002
2 1003
3 1004
4 1005
5 1006
6 1007
7 1008
8 1009
9 1010
10 1011
Name: NDB_No, Length: 8618, dtype: int64
columns = [ "Zinc_(mg)" , "Copper_(mg)" ]
zinc_copper = food_info[ columns]
print zinc_copper
Zinc_(mg) Copper_(mg)
0 0.09 0.000
1 0.05 0.016
2 0.01 0.001
3 2.66 0.040
4 2.60 0.024
5 2.38 0.019
6 2.38 0.021
7 2.94 0.024
8 3.43 0.056
9 2.79 0.042
10 3.07 0.042
[8618 rows x 2 columns]
print ( food_info. columns)
col_names = food_info. columns. tolist( )
gram_columns = [ ]
for c in col_names:
if c. endswith( "(g)" ) :
gram_columns. append( c)
gram_df = food_info[ gram_columns]
print ( gram_df. head( 3 ) )
Index([u'NDB_No', u'Shrt_Desc', u'Water_(g)', u'Energ_Kcal', u'Protein_(g)',
u'Lipid_Tot_(g)', u'Ash_(g)', u'Carbohydrt_(g)', u'Fiber_TD_(g)',
u'Sugar_Tot_(g)', u'Calcium_(mg)', u'Iron_(mg)', u'Magnesium_(mg)',
u'Phosphorus_(mg)', u'Potassium_(mg)', u'Sodium_(mg)', u'Zinc_(mg)',
u'Copper_(mg)', u'Manganese_(mg)', u'Selenium_(mcg)', u'Vit_C_(mg)',
u'Thiamin_(mg)', u'Riboflavin_(mg)', u'Niacin_(mg)', u'Vit_B6_(mg)',
u'Vit_B12_(mcg)', u'Vit_A_IU', u'Vit_A_RAE', u'Vit_E_(mg)',
u'Vit_D_mcg', u'Vit_D_IU', u'Vit_K_(mcg)', u'FA_Sat_(g)',
u'FA_Mono_(g)', u'FA_Poly_(g)', u'Cholestrl_(mg)'],
dtype='object')
Water_(g) Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) \
0 15.87 0.85 81.11 2.11 0.06
1 15.87 0.85 81.11 2.11 0.06
2 0.24 0.28 99.48 0.00 0.00
Fiber_TD_(g) Sugar_Tot_(g) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g)
0 0.0 0.06 51.368 21.021 3.043
1 0.0 0.06 50.489 23.426 3.012
2 0.0 0.00 61.924 28.732 3.694
import pandas
food_info = pandas. read_csv( "food_info.csv" )
col_names = food_info. columns. tolist( )
gram_columns = [ ]
for c in col_names:
if c. endswith( "(g)" ) :
gram_columns. append( c)
print food_info[ gram_columns] / 10
Water_(g) Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) \
0 1.587 0.085 8.111 0.211 0.006
1 1.587 0.085 8.111 0.211 0.006
2 0.024 0.028 9.948 0.000 0.000
3 4.241 2.140 2.874 0.511 0.234
4 4.111 2.324 2.968 0.318 0.279
5 4.842 2.075 2.768 0.270 0.045
6 5.180 1.980 2.426 0.368 0.046
7 3.928 2.518 2.920 0.328 0.306
8 3.710 2.404 3.382 0.371 0.133
9 3.765 2.337 3.060 0.360 0.478
10 3.820 2.376 3.211 0.336 0.257
Fiber_TD_(g) Sugar_Tot_(g) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g)
0 0.00 0.006 5.1368 2.1021 0.3043
1 0.00 0.006 5.0489 2.3426 0.3012
2 0.00 0.000 6.1924 2.8732 0.3694
3 0.00 0.050 1.8669 0.7778 0.0800
4 0.00 0.051 1.8764 0.8598 0.0784
5 0.00 0.045 1.7410 0.8013 0.0826
6 0.00 0.046 1.5259 0.7023 0.0724
7 0.00 NaN 1.8584 0.8275 0.0830
8 0.00 0.028 1.9368 0.8428 0.1433
9 0.00 NaN 1.9475 0.8671 0.0870
10 0.00 0.052 2.0218 0.9280 0.0953
titanic_train = pandas. read_csv( "titanic_train.csv" )
age = titanic_train[ "Age" ]
age_is_null = pandas. isnull( age)
numb = age[ age_is_null]
print len ( numb)
177
good_ages = ( titanic_train[ "Age" ] [ age_is_null== False ] )
correct_mean_age = sum ( good_ages) / len ( good_ages)
print correct_mean_age
29.6991176471
good_ages. mean( )
29.69911764705882
titanic_train. pivot_table( index= "Pclass" , values= "Age" )
Age Pclass 1 38.233441 2 29.877630 3 25.140620