数据结构Series
import numpy as np
import pandas as pd
构造和初始化series
s = pd. Series( [ 7 , 'Beijing' , 3.14 , - 12345 , 'Hanxiao' ] )
s[ 1 ]
'Beijing'
s
0 7
1 Beijing
2 3.14
3 -12345
4 Hanxiao
dtype: object
pandas默认用0到n作为Series的index,也可以自定义index(索引)。
s = pd. Series( [ 7 , 'Beijing' , 3.14 , - 12345 , 'Hanxiao' ] , index= [ 'A' , 'B' , 'C' , 'D' , 'E' ] )
s
A 7
B Beijing
C 3.14
D -12345
E Hanxiao
dtype: object
s[ 'A' ]
7
s[ [ 'A' , 'D' , 'B' ] ]
A 7
D -12345
B Beijing
dtype: object
可以用list构建Series,同时可以指定index。也可以用dict来初始化Series。
cities = { 'Beijing' : 55000 , 'Shanghai' : 60000 , 'Shenzhen' : 50000 , 'Hangzhou' : 30000 , 'Guangzhou' : 40000 , 'Suzhou' : None }
cities
{'Beijing': 55000,
'Shanghai': 60000,
'Shenzhen': 50000,
'Hangzhou': 30000,
'Guangzhou': 40000,
'Suzhou': None}
apt = pd. Series( cities, name= 'income' )
apt
Beijing 55000.0
Shanghai 60000.0
Shenzhen 50000.0
Hangzhou 30000.0
Guangzhou 40000.0
Suzhou NaN
Name: income, dtype: float64
apt[ 'Guangzhou' ]
40000.0
apt[ 4 ]
40000.0
apt[ 1 : ]
Shanghai 60000.0
Shenzhen 50000.0
Hangzhou 30000.0
Guangzhou 40000.0
Suzhou NaN
Name: income, dtype: float64
apt[ : - 1 ]
Beijing 55000.0
Shanghai 60000.0
Shenzhen 50000.0
Hangzhou 30000.0
Guangzhou 40000.0
Name: income, dtype: float64
apt[ [ 3 , 4 , 1 ] ]
Hangzhou 30000.0
Guangzhou 40000.0
Shanghai 60000.0
Name: income, dtype: float64
apt[ [ 'Shanghai' , 'Shenzhen' ] ]
Shanghai 60000.0
Shenzhen 50000.0
Name: income, dtype: float64
3 * apt
Beijing 165000.0
Shanghai 180000.0
Shenzhen 150000.0
Hangzhou 90000.0
Guangzhou 120000.0
Suzhou NaN
Name: income, dtype: float64
apt/ 2.5
Beijing 22000.0
Shanghai 24000.0
Shenzhen 20000.0
Hangzhou 12000.0
Guangzhou 16000.0
Suzhou NaN
Name: income, dtype: float64
apt[ 1 : ] + apt[ : - 1 ]
Beijing NaN
Guangzhou 80000.0
Hangzhou 60000.0
Shanghai 120000.0
Shenzhen 100000.0
Suzhou NaN
Name: income, dtype: float64
'Hangzhou' in apt
True
'Chongqing' in apt
False
print ( apt. get( 'Chongqing' ) )
None
print ( apt. get( 'Guangzhou' ) )
40000.0
booling indexing/条件判断索引
apt>= 40000
Beijing True
Shanghai True
Shenzhen True
Hangzhou False
Guangzhou True
Suzhou False
Name: income, dtype: bool
apt[ apt>= 40000 ]
Beijing 55000.0
Shanghai 60000.0
Shenzhen 50000.0
Guangzhou 40000.0
Name: income, dtype: float64
apt. mean( )
47000.0
apt. median( )
50000.0
apt. max ( )
60000.0
apt. min ( )
30000.0
Series赋值
apt
Beijing 55000.0
Shanghai 60000.0
Shenzhen 50000.0
Hangzhou 30000.0
Guangzhou 40000.0
Suzhou NaN
Name: income, dtype: float64
apt[ 'Shenzhen' ] = 70000
apt
Beijing 55000.0
Shanghai 60000.0
Shenzhen 70000.0
Hangzhou 30000.0
Guangzhou 40000.0
Suzhou NaN
Name: income, dtype: float64
apt[ apt<= 40000 ] = 45000
apt
Beijing 55000.0
Shanghai 60000.0
Shenzhen 70000.0
Hangzhou 45000.0
Guangzhou 45000.0
Suzhou NaN
Name: income, dtype: float64
np. log( apt)
Beijing 10.915088
Shanghai 11.002100
Shenzhen 11.156251
Hangzhou 10.714418
Guangzhou 10.714418
Suzhou NaN
Name: income, dtype: float64
cars = pd. Series( { 'Beijing' : 350000 , 'Shanghai' : 400000 , 'Shenzhen' : 300000 , 'Tianjing' : 200000 , 'Guangzhou' : 250000 , 'Chongqing' : 150000 } )
cars
Beijing 350000
Shanghai 400000
Shenzhen 300000
Tianjing 200000
Guangzhou 250000
Chongqing 150000
dtype: int64
expense = cars + 10 * apt
expense
Beijing 900000.0
Chongqing NaN
Guangzhou 700000.0
Hangzhou NaN
Shanghai 1000000.0
Shenzhen 1000000.0
Suzhou NaN
Tianjing NaN
dtype: float64
缺失数据处理
apt
Beijing 55000.0
Shanghai 60000.0
Shenzhen 70000.0
Hangzhou 45000.0
Guangzhou 45000.0
Suzhou NaN
Name: income, dtype: float64
apt. notnull( )
Beijing True
Shanghai True
Shenzhen True
Hangzhou True
Guangzhou True
Suzhou False
Name: income, dtype: bool
apt. isnull( )
Beijing False
Shanghai False
Shenzhen False
Hangzhou False
Guangzhou False
Suzhou True
Name: income, dtype: bool
expense
Beijing 900000.0
Chongqing 900000.0
Guangzhou 700000.0
Hangzhou 900000.0
Shanghai 1000000.0
Shenzhen 1000000.0
Suzhou 900000.0
Tianjing 900000.0
dtype: float64
expense[ expense. isnull( ) ] = expense. mean( )
expense
Beijing 900000.0
Chongqing 900000.0
Guangzhou 700000.0
Hangzhou 900000.0
Shanghai 1000000.0
Shenzhen 1000000.0
Suzhou 900000.0
Tianjing 900000.0
dtype: float64
数据结构DataFrame
二位数组
data = { 'City' : [ 'Beijing' , 'Shanghai' , 'Guangzhou' , 'Shenzhen' , 'Hanghzhou' , 'Chongqing' ] ,
'year' : [ 2017 , 2018 , 2017 , 2018 , 2017 , 2017 ] ,
'popu' : [ 2100 , 2300 , 1000 , 700 , 500 , 500 ]
}
pd. DataFrame( data)
City year popu 0 Beijing 2017 2100 1 Shanghai 2018 2300 2 Guangzhou 2017 1000 3 Shenzhen 2018 700 4 Hanghzhou 2017 500 5 Chongqing 2017 500
pd. DataFrame( data, columns= [ 'year' , 'City' , 'popu' ] )
year City popu 0 2017 Beijing 2100 1 2018 Shanghai 2300 2 2017 Guangzhou 1000 3 2018 Shenzhen 700 4 2017 Hanghzhou 500 5 2017 Chongqing 500
pd. DataFrame( data, columns= [ 'year' , 'City' , 'popu' ] , index= [ 'one' , 'two' , 'three' , 'four' , 'five' , 'six' ] )
year City popu one 2017 Beijing 2100 two 2018 Shanghai 2300 three 2017 Guangzhou 1000 four 2018 Shenzhen 700 five 2017 Hanghzhou 500 six 2017 Chongqing 500
apt
Beijing 55000.0
Shanghai 60000.0
Shenzhen 70000.0
Hangzhou 45000.0
Guangzhou 45000.0
Suzhou NaN
Name: income, dtype: float64
cars
Beijing 350000
Shanghai 400000
Shenzhen 300000
Tianjing 200000
Guangzhou 250000
Chongqing 150000
dtype: int64
df = pd. DataFrame( { 'apt' : apt, 'cars' : cars} )
df
apt cars Beijing 55000.0 350000.0 Chongqing NaN 150000.0 Guangzhou 45000.0 250000.0 Hangzhou 45000.0 NaN Shanghai 60000.0 400000.0 Shenzhen 70000.0 300000.0 Suzhou NaN NaN Tianjing NaN 200000.0
df[ 'apt' ]
Beijing 55000.0
Chongqing NaN
Guangzhou 45000.0
Hangzhou 45000.0
Shanghai 60000.0
Shenzhen 70000.0
Suzhou NaN
Tianjing NaN
Name: apt, dtype: float64
type ( df[ 'apt' ] )
pandas.core.series.Series
df[ [ 'apt' ] ]
apt Beijing 55000.0 Chongqing NaN Guangzhou 45000.0 Hangzhou 45000.0 Shanghai 60000.0 Shenzhen 70000.0 Suzhou NaN Tianjing NaN
type ( df[ [ 'apt' ] ] )
pandas.core.frame.DataFrame
df
apt cars Beijing 55000.0 350000.0 Chongqing NaN 150000.0 Guangzhou 45000.0 250000.0 Hangzhou 45000.0 NaN Shanghai 60000.0 400000.0 Shenzhen 70000.0 300000.0 Suzhou NaN NaN Tianjing NaN 200000.0
df[ 'bonus' ] = 40000
df
apt cars bonus Beijing 55000.0 350000.0 40000 Chongqing NaN 150000.0 40000 Guangzhou 45000.0 250000.0 40000 Hangzhou 45000.0 NaN 40000 Shanghai 60000.0 400000.0 40000 Shenzhen 70000.0 300000.0 40000 Suzhou NaN NaN 40000 Tianjing NaN 200000.0 40000
df[ 'expense' ] = df[ 'apt' ] + df[ 'bonus' ]
df
apt cars bonus expense Beijing 55000.0 350000.0 40000 95000.0 Chongqing NaN 150000.0 40000 NaN Guangzhou 45000.0 250000.0 40000 85000.0 Hangzhou 45000.0 NaN 40000 85000.0 Shanghai 60000.0 400000.0 40000 100000.0 Shenzhen 70000.0 300000.0 40000 110000.0 Suzhou NaN NaN 40000 NaN Tianjing NaN 200000.0 40000 NaN
df. index
Index(['Beijing', 'Chongqing', 'Guangzhou', 'Hangzhou', 'Shanghai', 'Shenzhen',
'Suzhou', 'Tianjing'],
dtype='object')
df. loc[ 'Beijing' ]
apt 55000.0
cars 350000.0
bonus 40000.0
expense 95000.0
Name: Beijing, dtype: float64
type ( df. loc[ 'Beijing' ] )
pandas.core.series.Series
df. loc[ [ 'Beijing' , 'Shanghai' , 'Guangzhou' ] ]
apt cars bonus expense Beijing 55000.0 350000.0 40000 95000.0 Shanghai 60000.0 400000.0 40000 100000.0 Guangzhou 45000.0 250000.0 40000 85000.0
df
apt cars bonus expense Beijing 55000.0 350000.0 40000 95000.0 Chongqing NaN 150000.0 40000 NaN Guangzhou 45000.0 250000.0 40000 85000.0 Hangzhou 45000.0 NaN 40000 85000.0 Shanghai 60000.0 400000.0 40000 100000.0 Shenzhen 70000.0 300000.0 40000 110000.0 Suzhou NaN NaN 40000 NaN Tianjing NaN 200000.0 40000 NaN
df. loc[ 'Beijing' : 'Suzhou' , [ 'apt' , 'bonus' ] ]
apt bonus Beijing 55000.0 40000 Chongqing NaN 40000 Guangzhou 45000.0 40000 Hangzhou 45000.0 40000 Shanghai 60000.0 40000 Shenzhen 70000.0 40000 Suzhou NaN 40000
df. loc[ 'Beijing' : 'Suzhou' , 'apt' : 'bonus' ]
apt cars bonus Beijing 55000.0 350000.0 40000 Chongqing NaN 150000.0 40000 Guangzhou 45000.0 250000.0 40000 Hangzhou 45000.0 NaN 40000 Shanghai 60000.0 400000.0 40000 Shenzhen 70000.0 300000.0 40000 Suzhou NaN NaN 40000
df. loc[ [ 'Beijing' , 'Suzhou' ] , [ 'apt' , 'bonus' ] ]
apt bonus Beijing 55000.0 40000 Suzhou NaN 40000
df. loc[ 'Beijing' , 'bonus' ] = 50000
df
apt cars bonus expense Beijing 55000.0 350000.0 50000 95000.0 Chongqing NaN 150000.0 40000 NaN Guangzhou 45000.0 250000.0 40000 85000.0 Hangzhou 45000.0 NaN 40000 85000.0 Shanghai 60000.0 400000.0 40000 100000.0 Shenzhen 70000.0 300000.0 40000 110000.0 Suzhou NaN NaN 40000 NaN Tianjing NaN 200000.0 40000 NaN
df. loc[ : , 'expense' ] = 100000
df
apt cars bonus expense Beijing 55000.0 350000.0 50000 100000 Chongqing NaN 150000.0 40000 100000 Guangzhou 45000.0 250000.0 40000 100000 Hangzhou 45000.0 NaN 40000 100000 Shanghai 60000.0 400000.0 40000 100000 Shenzhen 70000.0 300000.0 40000 100000 Suzhou NaN NaN 40000 100000 Tianjing NaN 200000.0 40000 100000
df. shape
(8, 4)
df. info( )
<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, Beijing to Tianjing
Data columns (total 4 columns):
apt 5 non-null float64
cars 6 non-null float64
bonus 8 non-null int64
expense 8 non-null int64
dtypes: float64(2), int64(2)
memory usage: 640.0+ bytes
df. T
Beijing Chongqing Guangzhou Hangzhou Shanghai Shenzhen Suzhou Tianjing apt 55000.0 NaN 45000.0 45000.0 60000.0 70000.0 NaN NaN cars 350000.0 150000.0 250000.0 NaN 400000.0 300000.0 NaN 200000.0 bonus 50000.0 40000.0 40000.0 40000.0 40000.0 40000.0 40000.0 40000.0 expense 100000.0 100000.0 100000.0 100000.0 100000.0 100000.0 100000.0 100000.0
df
apt cars bonus expense Beijing 55000.0 350000.0 50000 100000 Chongqing NaN 150000.0 40000 100000 Guangzhou 45000.0 250000.0 40000 100000 Hangzhou 45000.0 NaN 40000 100000 Shanghai 60000.0 400000.0 40000 100000 Shenzhen 70000.0 300000.0 40000 100000 Suzhou NaN NaN 40000 100000 Tianjing NaN 200000.0 40000 100000
df. describe( )
apt cars bonus expense count 5.000000 6.000000 8.000000 8.0 mean 55000.000000 275000.000000 41250.000000 100000.0 std 10606.601718 93541.434669 3535.533906 0.0 min 45000.000000 150000.000000 40000.000000 100000.0 25% 45000.000000 212500.000000 40000.000000 100000.0 50% 55000.000000 275000.000000 40000.000000 100000.0 75% 60000.000000 337500.000000 40000.000000 100000.0 max 70000.000000 400000.000000 50000.000000 100000.0
df[ 'cars' ]
Beijing 350000.0
Chongqing 150000.0
Guangzhou 250000.0
Hangzhou NaN
Shanghai 400000.0
Shenzhen 300000.0
Suzhou NaN
Tianjing 200000.0
Name: cars, dtype: float64
df[ 'cars' ] < 310000
Beijing False
Chongqing True
Guangzhou True
Hangzhou False
Shanghai False
Shenzhen True
Suzhou False
Tianjing True
Name: cars, dtype: bool
df. loc[ : , 'color' ] = [ '红' , '黄' , '紫' , '蓝' , '红' , '绿' , '棕' , '红' ]
df
apt cars bonus expense color Beijing 55000.0 350000.0 50000 100000 红 Chongqing NaN 150000.0 40000 100000 黄 Guangzhou 45000.0 250000.0 40000 100000 紫 Hangzhou 45000.0 NaN 40000 100000 蓝 Shanghai 60000.0 400000.0 40000 100000 红 Shenzhen 70000.0 300000.0 40000 100000 绿 Suzhou NaN NaN 40000 100000 棕 Tianjing NaN 200000.0 40000 100000 红
df[ 'color' ] . isin( [ '红' , '绿' ] )
Beijing True
Chongqing False
Guangzhou False
Hangzhou False
Shanghai True
Shenzhen True
Suzhou False
Tianjing True
Name: color, dtype: bool
df[ df[ 'color' ] . isin( [ '红' , '绿' ] ) ]
apt cars bonus expense color Beijing 55000.0 350000.0 50000 100000 红 Shanghai 60000.0 400000.0 40000 100000 红 Shenzhen 70000.0 300000.0 40000 100000 绿 Tianjing NaN 200000.0 40000 100000 红
df. fillna( value= 50000 )
apt cars bonus expense color Beijing 55000.0 350000.0 50000 100000 红 Chongqing 50000.0 150000.0 40000 100000 黄 Guangzhou 45000.0 250000.0 40000 100000 紫 Hangzhou 45000.0 50000.0 40000 100000 蓝 Shanghai 60000.0 400000.0 40000 100000 红 Shenzhen 70000.0 300000.0 40000 100000 绿 Suzhou 50000.0 50000.0 40000 100000 棕 Tianjing 50000.0 200000.0 40000 100000 红
df
apt cars bonus expense color Beijing 55000.0 350000.0 50000 100000 红 Chongqing NaN 150000.0 40000 100000 黄 Guangzhou 45000.0 250000.0 40000 100000 紫 Hangzhou 45000.0 NaN 40000 100000 蓝 Shanghai 60000.0 400000.0 40000 100000 红 Shenzhen 70000.0 300000.0 40000 100000 绿 Suzhou NaN NaN 40000 100000 棕 Tianjing NaN 200000.0 40000 100000 红
df. fillna( method= 'ffill' )
apt cars bonus expense color Beijing 55000.0 350000.0 50000 100000 红 Chongqing 55000.0 150000.0 40000 100000 黄 Guangzhou 45000.0 250000.0 40000 100000 紫 Hangzhou 45000.0 250000.0 40000 100000 蓝 Shanghai 60000.0 400000.0 40000 100000 红 Shenzhen 70000.0 300000.0 40000 100000 绿 Suzhou 70000.0 300000.0 40000 100000 棕 Tianjing 70000.0 200000.0 40000 100000 红
df. fillna( method= 'bfill' )
apt cars bonus expense color Beijing 55000.0 350000.0 50000 100000 红 Chongqing 45000.0 150000.0 40000 100000 黄 Guangzhou 45000.0 250000.0 40000 100000 紫 Hangzhou 45000.0 400000.0 40000 100000 蓝 Shanghai 60000.0 400000.0 40000 100000 红 Shenzhen 70000.0 300000.0 40000 100000 绿 Suzhou NaN 200000.0 40000 100000 棕 Tianjing NaN 200000.0 40000 100000 红
从外部读入文件
stock = pd. read_csv( 'stock_px.csv' )
stock. head( )
DATE AA AAPL GE IBM JNJ MSFT PEP SPX XOM 0 1990-2-1 0:00 4.98 7.86 2.87 16.79 4.27 0.51 6.04 328.79 6.12 1 1990-2-2 0:00 5.04 8.00 2.87 16.89 4.37 0.51 6.09 330.92 6.24 2 1990-2-5 0:00 5.07 8.18 2.87 17.32 4.34 0.51 6.05 331.85 6.25 3 1990-2-6 0:00 5.01 8.12 2.88 17.56 4.32 0.51 6.15 329.66 6.23 4 1990-2-7 0:00 5.04 7.77 2.91 17.93 4.38 0.51 6.17 333.75 6.33
stock = pd. read_csv( 'stock_px.csv' , index_col= 0 , parse_dates= [ 'DATE' ] )
stock. head( 6 )
AA AAPL GE IBM JNJ MSFT PEP SPX XOM DATE 1990-02-01 4.98 7.86 2.87 16.79 4.27 0.51 6.04 328.79 6.12 1990-02-02 5.04 8.00 2.87 16.89 4.37 0.51 6.09 330.92 6.24 1990-02-05 5.07 8.18 2.87 17.32 4.34 0.51 6.05 331.85 6.25 1990-02-06 5.01 8.12 2.88 17.56 4.32 0.51 6.15 329.66 6.23 1990-02-07 5.04 7.77 2.91 17.93 4.38 0.51 6.17 333.75 6.33 1990-02-08 5.04 7.71 2.92 17.86 4.46 0.51 6.22 332.96 6.35
stock. info( )
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5472 entries, 1990-02-01 to 2011-10-14
Data columns (total 9 columns):
AA 5472 non-null float64
AAPL 5472 non-null float64
GE 5472 non-null float64
IBM 5472 non-null float64
JNJ 5472 non-null float64
MSFT 5472 non-null float64
PEP 5471 non-null float64
SPX 5472 non-null float64
XOM 5472 non-null float64
dtypes: float64(9)
memory usage: 427.5 KB
stock. describe( )
AA AAPL GE IBM JNJ MSFT PEP SPX XOM count 5472.000000 5472.000000 5472.000000 5472.000000 5472.000000 5472.000000 5471.000000 5472.000000 5472.000000 mean 17.440285 57.119313 17.933194 66.637730 34.225340 17.046345 34.284204 945.035216 35.225919 std 9.647999 88.670423 10.647635 41.689481 19.726666 11.000988 18.383894 369.494672 23.967647 min 4.200000 3.230000 2.400000 8.400000 4.200000 0.510000 5.870000 295.460000 5.940000 25% 8.077500 8.760000 6.280000 20.577500 12.160000 4.240000 15.480000 547.217500 11.910000 50% 14.885000 11.990000 18.150000 74.115000 37.570000 20.910000 33.750000 1058.305000 30.280000 75% 26.340000 68.017500 27.300000 95.657500 52.675000 25.310000 49.530000 1253.395000 55.300000 max 43.620000 422.000000 42.780000 190.530000 67.320000 46.810000 71.250000 1565.150000 87.480000
stock. tail( )
AA AAPL GE IBM JNJ MSFT PEP SPX XOM DATE 2011-10-10 10.09 388.81 16.14 186.62 64.43 26.94 61.87 1194.89 76.28 2011-10-11 10.30 400.29 16.14 185.00 63.96 27.00 60.95 1195.54 76.27 2011-10-12 10.05 402.19 16.40 186.12 64.33 26.96 62.70 1207.25 77.16 2011-10-13 10.10 408.43 16.22 186.82 64.23 27.18 62.36 1203.66 76.37 2011-10-14 10.26 422.00 16.60 190.53 64.72 27.27 62.24 1224.58 78.11
stock. index
DatetimeIndex(['1990-02-01', '1990-02-02', '1990-02-05', '1990-02-06',
'1990-02-07', '1990-02-08', '1990-02-09', '1990-02-12',
'1990-02-13', '1990-02-14',
...
'2011-10-03', '2011-10-04', '2011-10-05', '2011-10-06',
'2011-10-07', '2011-10-10', '2011-10-11', '2011-10-12',
'2011-10-13', '2011-10-14'],
dtype='datetime64[ns]', name='DATE', length=5472, freq=None)
stock. loc[ : , 'dow' ] = stock. index. dayofweek
stock. head( )
AA AAPL GE IBM JNJ MSFT PEP SPX XOM dow DATE 1990-02-01 4.98 7.86 2.87 16.79 4.27 0.51 6.04 328.79 6.12 3 1990-02-02 5.04 8.00 2.87 16.89 4.37 0.51 6.09 330.92 6.24 4 1990-02-05 5.07 8.18 2.87 17.32 4.34 0.51 6.05 331.85 6.25 0 1990-02-06 5.01 8.12 2.88 17.56 4.32 0.51 6.15 329.66 6.23 1 1990-02-07 5.04 7.77 2.91 17.93 4.38 0.51 6.17 333.75 6.33 2
stock. loc[ : , 'doy' ] = stock. index. dayofyear
stock. head( )
AA AAPL GE IBM JNJ MSFT PEP SPX XOM dow doy DATE 1990-02-01 4.98 7.86 2.87 16.79 4.27 0.51 6.04 328.79 6.12 3 32 1990-02-02 5.04 8.00 2.87 16.89 4.37 0.51 6.09 330.92 6.24 4 33 1990-02-05 5.07 8.18 2.87 17.32 4.34 0.51 6.05 331.85 6.25 0 36 1990-02-06 5.01 8.12 2.88 17.56 4.32 0.51 6.15 329.66 6.23 1 37 1990-02-07 5.04 7.77 2.91 17.93 4.38 0.51 6.17 333.75 6.33 2 38
% matplotlib inline
stock[ 'JNJ' ] . plot( )
<matplotlib.axes._subplots.AxesSubplot at 0x19bfb184788>
stock[ 'JNJ' ] . plot( grid= True )
<matplotlib.axes._subplots.AxesSubplot at 0x19bfc4b9188>
sto = stock[ ( stock. index >= '2009-01-01' ) & ( stock. index <= '2009-1-31' ) ]
sto[ 'JNJ' ] . plot( kind= 'bar' )
<matplotlib.axes._subplots.AxesSubplot at 0x19bfc7eaa48>
stock[ ( stock. index >= '2009-01-01' ) & ( stock. index < '2009-1-31' ) ] . describe( )
AA AAPL GE IBM JNJ MSFT PEP SPX XOM dow doy count 20.000000 20.000000 20.000000 20.00000 20.00000 20.000000 20.000000 20.000000 20.000000 20.00000 20.000000 mean 9.187000 88.775000 12.947500 83.65950 52.89300 17.768000 47.889500 865.575500 73.170500 2.20000 16.650000 std 1.364208 4.377831 1.532359 3.34427 1.16083 1.135261 1.694649 38.874233 1.590811 1.43637 8.851256 min 7.450000 78.200000 10.860000 77.87000 50.94000 16.060000 46.150000 805.220000 70.230000 0.00000 2.000000 25% 8.057500 87.115000 11.747500 80.96000 52.15000 16.582500 46.490000 839.322500 71.930000 1.00000 8.750000 50% 8.805000 89.885000 12.525000 83.19000 52.89000 18.140000 47.345000 847.915000 73.100000 2.00000 15.500000 75% 10.360000 91.432500 14.475000 86.87250 53.71750 18.545000 48.457500 894.425000 73.997500 3.25000 23.750000 max 11.600000 94.580000 15.420000 90.06000 55.20000 19.500000 51.420000 934.700000 76.350000 4.00000 30.000000
sto. to_csv( 'sto.csv' )
sto. to_csv( 'n_sto.csv' , index= False )