pandas基本命令

import pandas
food_info = pandas.read_csv("food_info.csv")
print(type(food_info))
print food_info 
print food_info.dtypes
<class 'pandas.core.frame.DataFrame'>
      NDB_No                                          Shrt_Desc  Water_(g)  \
0       1001                                   BUTTER WITH SALT      15.87   
1       1002                           BUTTER WHIPPED WITH SALT      15.87   
2       1003                               BUTTER OIL ANHYDROUS       0.24   
3       1004                                        CHEESE BLUE      42.41   
4       1005                                       CHEESE BRICK      41.11   
5       1006                                        CHEESE BRIE      48.42   
6       1007                                   CHEESE CAMEMBERT      51.80   
7       1008                                     CHEESE CARAWAY      39.28   
8       1009                                     CHEESE CHEDDAR      37.10   
9       1010                                    CHEESE CHESHIRE      37.65   
10      1011                                       CHEESE COLBY      38.20   


      Energ_Kcal  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0            717         0.85          81.11     2.11            0.06   
1            717         0.85          81.11     2.11            0.06   
2            876         0.28          99.48     0.00            0.00   
3            353        21.40          28.74     5.11            2.34   
4            371        23.24          29.68     3.18            2.79   
5            334        20.75          27.68     2.70            0.45   
6            300        19.80          24.26     3.68            0.46   
7            376        25.18          29.20     3.28            3.06   
8            406        24.04          33.82     3.71            1.33   
9            387        23.37          30.60     3.60            4.78   
10           394        23.76          32.11     3.36            2.57   


      Fiber_TD_(g)  Sugar_Tot_(g)       ...        Vit_A_IU  Vit_A_RAE  \
0              0.0           0.06       ...          2499.0      684.0   
1              0.0           0.06       ...          2499.0      684.0   
2              0.0           0.00       ...          3069.0      840.0   
3              0.0           0.50       ...           721.0      198.0   
4              0.0           0.51       ...          1080.0      292.0   
5              0.0           0.45       ...           592.0      174.0   
6              0.0           0.46       ...           820.0      241.0   
7              0.0            NaN       ...          1054.0      271.0   
8              0.0           0.28       ...           994.0      263.0   
9              0.0            NaN       ...           985.0      233.0   
10             0.0           0.52       ...           994.0      264.0   


      Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  FA_Sat_(g)  FA_Mono_(g)  \
0           2.32        1.5      60.0          7.0      51.368       21.021   
1           2.32        1.5      60.0          7.0      50.489       23.426   
2           2.80        1.8      73.0          8.6      61.924       28.732   
3           0.25        0.5      21.0          2.4      18.669        7.778   
4           0.26        0.5      22.0          2.5      18.764        8.598   
5           0.24        0.5      20.0          2.3      17.410        8.013   
6           0.21        0.4      18.0          2.0      15.259        7.023   
7            NaN        NaN       NaN          NaN      18.584        8.275   
8           0.78        0.6      24.0          2.9      19.368        8.428   
9            NaN        NaN       NaN          NaN      19.475        8.671   
10          0.28        0.6      24.0          2.7      20.218        9.280   


      FA_Poly_(g)  Cholestrl_(mg)  
0           3.043           215.0  
1           3.012           219.0  
2           3.694           256.0  
3           0.800            75.0  
4           0.784            94.0  
5           0.826           100.0  
6           0.724            72.0  
7           0.830            93.0  
8           1.433           102.0  
9           0.870           103.0  
10          0.953            95.0  


[8618 rows x 36 columns]
NDB_No               int64
Shrt_Desc           object
Water_(g)          float64
Energ_Kcal           int64
Protein_(g)        float64
Lipid_Tot_(g)      float64
Ash_(g)            float64
Carbohydrt_(g)     float64
Fiber_TD_(g)       float64
Sugar_Tot_(g)      float64
Calcium_(mg)       float64
Iron_(mg)          float64
Magnesium_(mg)     float64
Phosphorus_(mg)    float64
Potassium_(mg)     float64
Sodium_(mg)        float64
Zinc_(mg)          float64
Copper_(mg)        float64
Manganese_(mg)     float64
Selenium_(mcg)     float64
Vit_C_(mg)         float64
Thiamin_(mg)       float64
Riboflavin_(mg)    float64
Niacin_(mg)        float64
Vit_B6_(mg)        float64
Vit_B12_(mcg)      float64
Vit_A_IU           float64
Vit_A_RAE          float64
Vit_E_(mg)         float64
Vit_D_mcg          float64
Vit_D_IU           float64
Vit_K_(mcg)        float64
FA_Sat_(g)         float64
FA_Mono_(g)        float64
FA_Poly_(g)        float64
Cholestrl_(mg)     float64
dtype: object
first_rows = food_info.head()
print first_rows
print(food_info.head(3))
print food_info.columns
print food_info.shape
   NDB_No                 Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  \
0    1001          BUTTER WITH SALT      15.87         717         0.85   
1    1002  BUTTER WHIPPED WITH SALT      15.87         717         0.85   
2    1003      BUTTER OIL ANHYDROUS       0.24         876         0.28   
3    1004               CHEESE BLUE      42.41         353        21.40   
4    1005              CHEESE BRICK      41.11         371        23.24   

   Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  \
0          81.11     2.11            0.06           0.0           0.06   
1          81.11     2.11            0.06           0.0           0.06   
2          99.48     0.00            0.00           0.0           0.00   
3          28.74     5.11            2.34           0.0           0.50   
4          29.68     3.18            2.79           0.0           0.51   

        ...        Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  \
0       ...          2499.0      684.0        2.32        1.5      60.0   
1       ...          2499.0      684.0        2.32        1.5      60.0   
2       ...          3069.0      840.0        2.80        1.8      73.0   
3       ...           721.0      198.0        0.25        0.5      21.0   
4       ...          1080.0      292.0        0.26        0.5      22.0   

   Vit_K_(mcg)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
0          7.0      51.368       21.021        3.043           215.0  
1          7.0      50.489       23.426        3.012           219.0  
2          8.6      61.924       28.732        3.694           256.0  
3          2.4      18.669        7.778        0.800            75.0  
4          2.5      18.764        8.598        0.784            94.0  

[5 rows x 36 columns]
#pandas uses zero-indexing
#Series object representing the row at index 0.
print food_info.loc[0]

# Series object representing the seventh row.
food_info.loc[6]

# Will throw an error: "KeyError: 'the label [8620] is not in the [index]'"
#food_info.loc[8620]
#The object dtype is equivalent to a string in Python
NDB_No                         1001
Shrt_Desc          BUTTER WITH SALT
Water_(g)                     15.87
Energ_Kcal                      717
Protein_(g)                    0.85
Lipid_Tot_(g)                 81.11
Ash_(g)                        2.11
Carbohydrt_(g)                 0.06
Fiber_TD_(g)                      0
Sugar_Tot_(g)                  0.06
Calcium_(mg)                     24
Iron_(mg)                      0.02
Magnesium_(mg)                    2
Phosphorus_(mg)                  24
Potassium_(mg)                   24
Sodium_(mg)                     643
Zinc_(mg)                      0.09
Copper_(mg)                       0
Manganese_(mg)                    0
Selenium_(mcg)                    1
Vit_C_(mg)                        0
Thiamin_(mg)                  0.005
Riboflavin_(mg)               0.034
Niacin_(mg)                   0.042
Vit_B6_(mg)                   0.003
Vit_B12_(mcg)                  0.17
Vit_A_IU                       2499
Vit_A_RAE                       684
Vit_E_(mg)                     2.32
Vit_D_mcg                       1.5
Vit_D_IU                         60
Vit_K_(mcg)                       7
FA_Sat_(g)                   51.368
FA_Mono_(g)                  21.021
FA_Poly_(g)                   3.043
Cholestrl_(mg)                  215
Name: 0, dtype: object





NDB_No                         1007
Shrt_Desc          CHEESE CAMEMBERT
Water_(g)                      51.8
Energ_Kcal                      300
Protein_(g)                    19.8
Lipid_Tot_(g)                 24.26
Ash_(g)                        3.68
Carbohydrt_(g)                 0.46
Fiber_TD_(g)                      0
Sugar_Tot_(g)                  0.46
Calcium_(mg)                    388
Iron_(mg)                      0.33
Magnesium_(mg)                   20
Phosphorus_(mg)                 347
Potassium_(mg)                  187
Sodium_(mg)                     842
Zinc_(mg)                      2.38
Copper_(mg)                   0.021
Manganese_(mg)                0.038
Selenium_(mcg)                 14.5
Vit_C_(mg)                        0
Thiamin_(mg)                  0.028
Riboflavin_(mg)               0.488
Niacin_(mg)                    0.63
Vit_B6_(mg)                   0.227
Vit_B12_(mcg)                   1.3
Vit_A_IU                        820
Vit_A_RAE                       241
Vit_E_(mg)                     0.21
Vit_D_mcg                       0.4
Vit_D_IU                         18
Vit_K_(mcg)                       2
FA_Sat_(g)                   15.259
FA_Mono_(g)                   7.023
FA_Poly_(g)                   0.724
Cholestrl_(mg)                   72
Name: 6, dtype: object
#object - For string values
#int - For integer values
#float - For float values
#datetime - For time values
#bool - For Boolean values
print(food_info.dtypes)
NDB_No               int64
Shrt_Desc           object
Water_(g)          float64
Energ_Kcal           int64
Protein_(g)        float64
Lipid_Tot_(g)      float64
Ash_(g)            float64
Carbohydrt_(g)     float64
Fiber_TD_(g)       float64
Sugar_Tot_(g)      float64
Calcium_(mg)       float64
Iron_(mg)          float64
Magnesium_(mg)     float64
Phosphorus_(mg)    float64
Potassium_(mg)     float64
Sodium_(mg)        float64
Zinc_(mg)          float64
Copper_(mg)        float64
Manganese_(mg)     float64
Selenium_(mcg)     float64
Vit_C_(mg)         float64
Thiamin_(mg)       float64
Riboflavin_(mg)    float64
Niacin_(mg)        float64
Vit_B6_(mg)        float64
Vit_B12_(mcg)      float64
Vit_A_IU           float64
Vit_A_RAE          float64
Vit_E_(mg)         float64
Vit_D_mcg          float64
Vit_D_IU           float64
Vit_K_(mcg)        float64
FA_Sat_(g)         float64
FA_Mono_(g)        float64
FA_Poly_(g)        float64
Cholestrl_(mg)     float64
dtype: object
# Returns a DataFrame containing the rows at indexes 3, 4, 5, and 6.
food_info.loc[3:6]

# Returns a DataFrame containing the rows at indexes 2, 5, and 10. Either of the following approaches will work.
# Method 1
#two_five_ten = [2,5,10] 
#food_info.loc[two_five_ten]

# Method 2
food_info.loc[[2,5,10]]
NDB_NoShrt_DescWater_(g)Energ_KcalProtein_(g)Lipid_Tot_(g)Ash_(g)Carbohydrt_(g)Fiber_TD_(g)Sugar_Tot_(g)...Vit_A_IUVit_A_RAEVit_E_(mg)Vit_D_mcgVit_D_IUVit_K_(mcg)FA_Sat_(g)FA_Mono_(g)FA_Poly_(g)Cholestrl_(mg)
21003BUTTER OIL ANHYDROUS0.248760.2899.480.000.000.00.00...3069.0840.02.801.873.08.661.92428.7323.694256.0
51006CHEESE BRIE48.4233420.7527.682.700.450.00.45...592.0174.00.240.520.02.317.4108.0130.826100.0
101011CHEESE COLBY38.2039423.7632.113.362.570.00.52...994.0264.00.280.624.02.720.2189.2800.95395.0

3 rows × 36 columns

# Series object representing the "NDB_No" column.
ndb_col = food_info["NDB_No"]
print ndb_col
# Alternatively, you can access a column by passing in a string variable.
col_name = "NDB_No"
ndb_col = food_info[col_name]
0        1001
1        1002
2        1003
3        1004
4        1005
5        1006
6        1007
7        1008
8        1009
9        1010
10       1011

Name: NDB_No, Length: 8618, dtype: int64
columns = ["Zinc_(mg)", "Copper_(mg)"]
zinc_copper = food_info[columns]
print zinc_copper
#print zinc_copper
# Skipping the assignment.
#zinc_copper = food_info[["Zinc_(mg)", "Copper_(mg)"]]
      Zinc_(mg)  Copper_(mg)
0          0.09        0.000
1          0.05        0.016
2          0.01        0.001
3          2.66        0.040
4          2.60        0.024
5          2.38        0.019
6          2.38        0.021
7          2.94        0.024
8          3.43        0.056
9          2.79        0.042
10         3.07        0.042


[8618 rows x 2 columns]
print(food_info.columns)
#print(food_info.head(2))
col_names = food_info.columns.tolist()
#print col_names
gram_columns = []

for c in col_names:
    if c.endswith("(g)"):
        gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))
Index([u'NDB_No', u'Shrt_Desc', u'Water_(g)', u'Energ_Kcal', u'Protein_(g)',
       u'Lipid_Tot_(g)', u'Ash_(g)', u'Carbohydrt_(g)', u'Fiber_TD_(g)',
       u'Sugar_Tot_(g)', u'Calcium_(mg)', u'Iron_(mg)', u'Magnesium_(mg)',
       u'Phosphorus_(mg)', u'Potassium_(mg)', u'Sodium_(mg)', u'Zinc_(mg)',
       u'Copper_(mg)', u'Manganese_(mg)', u'Selenium_(mcg)', u'Vit_C_(mg)',
       u'Thiamin_(mg)', u'Riboflavin_(mg)', u'Niacin_(mg)', u'Vit_B6_(mg)',
       u'Vit_B12_(mcg)', u'Vit_A_IU', u'Vit_A_RAE', u'Vit_E_(mg)',
       u'Vit_D_mcg', u'Vit_D_IU', u'Vit_K_(mcg)', u'FA_Sat_(g)',
       u'FA_Mono_(g)', u'FA_Poly_(g)', u'Cholestrl_(mg)'],
      dtype='object')
   Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0      15.87         0.85          81.11     2.11            0.06   
1      15.87         0.85          81.11     2.11            0.06   
2       0.24         0.28          99.48     0.00            0.00   

   Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
0           0.0           0.06      51.368       21.021        3.043  
1           0.0           0.06      50.489       23.426        3.012  
2           0.0           0.00      61.924       28.732        3.694  
import pandas
food_info = pandas.read_csv("food_info.csv")
col_names = food_info.columns.tolist()
gram_columns = []
for c in col_names: 
    if c.endswith("(g)"):
         gram_columns.append(c)         
print food_info[gram_columns]/10
      Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0         1.587        0.085          8.111    0.211           0.006   
1         1.587        0.085          8.111    0.211           0.006   
2         0.024        0.028          9.948    0.000           0.000   
3         4.241        2.140          2.874    0.511           0.234   
4         4.111        2.324          2.968    0.318           0.279   
5         4.842        2.075          2.768    0.270           0.045   
6         5.180        1.980          2.426    0.368           0.046   
7         3.928        2.518          2.920    0.328           0.306   
8         3.710        2.404          3.382    0.371           0.133   
9         3.765        2.337          3.060    0.360           0.478   
10        3.820        2.376          3.211    0.336           0.257   


      Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
0             0.00          0.006      5.1368       2.1021       0.3043  
1             0.00          0.006      5.0489       2.3426       0.3012  
2             0.00          0.000      6.1924       2.8732       0.3694  
3             0.00          0.050      1.8669       0.7778       0.0800  
4             0.00          0.051      1.8764       0.8598       0.0784  
5             0.00          0.045      1.7410       0.8013       0.0826  
6             0.00          0.046      1.5259       0.7023       0.0724  
7             0.00            NaN      1.8584       0.8275       0.0830  
8             0.00          0.028      1.9368       0.8428       0.1433  
9             0.00            NaN      1.9475       0.8671       0.0870  
10            0.00          0.052      2.0218       0.9280       0.0953  
titanic_train = pandas.read_csv("titanic_train.csv")
age = titanic_train["Age"]
age_is_null = pandas.isnull(age)
numb = age[age_is_null]
print len(numb)
177
good_ages = (titanic_train["Age"][age_is_null==False])
correct_mean_age = sum(good_ages)/len(good_ages)
print correct_mean_age
29.6991176471
good_ages.mean()
29.69911764705882
titanic_train.pivot_table(index="Pclass",values="Age")
Age
Pclass
138.233441
229.877630
325.140620
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值