task3

from pandas import Series,DataFrame
import pandas as pd
import numpy as np
obj=pd.Series([4,7,-5,3])
obj
0    4
1    7
2   -5
3    3
dtype: int64
#Series是类似于一维数组的数组,它是由一组数据(各种numpy数据类型)和与之相关的数据标签组成
#因此可以用values和index来获取数据的表示形式和索引对象
print (type(obj.values))
print (type(obj.index))
<class 'numpy.ndarray'>
<class 'pandas.core.indexes.range.RangeIndex'>
obj2=pd.Series([4,7,-5,3],index=["b","c","v","f"])
obj2
b    4
c    7
v   -5
f    3
dtype: int64
obj2.index
Index(['b', 'c', 'v', 'f'], dtype='object')
print (obj2["v"])
print (obj2[["c","v","f"]])#索引值列表,即使包含的是字符串而不是整数

-5
c    7
v   -5
f    3
dtype: int64
obj2.index[3]
'f'
obj2*2
b     8
c    14
v   -10
f     6
dtype: int64
np.exp(obj2)#exp() 方法返回x的指数,e的x次方
b      54.598150
c    1096.633158
v       0.006738
f      20.085537
dtype: float64
"b"in obj2
True
"r"in obj2
False
sdata={"ohio":35000,"texas":71000,"oregon":16000,"utah":5000}
obj3=pd.Series(sdata)
obj3
ohio      35000
texas     71000
oregon    16000
utah       5000
dtype: int64
#NA:表示缺失值(Missing value),是“Not Available”的缩写,NaN:表示非数值,是“Not a Number”的缩写
#NULL:表示空值,即没有内容
states=["california","ohio","michegan","utah"]
obj4=pd.Series(sdata,index=states)
obj4#值找不到,其结果就是“NAN“,not a number,在pandas中,用于表示缺失值或NA值
california        NaN
ohio          35000.0
michegan          NaN
utah           5000.0
dtype: float64
#pandas 的 isnull 和 notnull 函数可用于检测缺失数据
pd.isnull(obj4)
california     True
ohio          False
michegan       True
utah          False
dtype: bool
pd.notnull(obj4)
california    False
ohio           True
michegan      False
utah           True
dtype: bool
#Series 也有类似的实例方法
obj4.isnull()
california     True
ohio          False
michegan       True
utah          False
dtype: bool
obj3
ohio      35000
texas     71000
oregon    16000
utah       5000
dtype: int64
obj4
california        NaN
ohio          35000.0
michegan          NaN
utah           5000.0
dtype: float64
obj3+obj4
california        NaN
michegan          NaN
ohio          70000.0
oregon            NaN
texas             NaN
utah          10000.0
dtype: float64
obj4.name="population"
obj4.index.name="state"
obj4
state
california        NaN
ohio          35000.0
michegan          NaN
utah           5000.0
Name: population, dtype: float64
#Series 的索引可以通过赋值的方式就地修改
obj.index=["bob","steve","jeff",'ryan']
obj
bob      4
steve    7
jeff    -5
ryan     3
dtype: int64
#DataFrame 是一个表格型的数据结构,它含有一组有序的列,每列可以是不同 的值类型(数值、字符串、布尔值等)。
#DataFrame 既有行索引也有列索引, 它可以被看做由 Series 组成的字典(共用同一个索引)。
#DataFrame 中的数据 是以一个或多个二维块存放的(而不是列表、字典或别的一维数据结构)
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada',
'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame
stateyearpop
0Ohio20001.5
1Ohio20011.7
2Ohio20023.6
3Nevada20012.4
4Nevada20022.9
5Nevada20033.2
print (frame.head())
    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
pd.DataFrame(data,columns=["year","state","pop"])
yearstatepop
02000Ohio1.5
12001Ohio1.7
22002Ohio3.6
32001Nevada2.4
42002Nevada2.9
52003Nevada3.2
frame2=pd.DataFrame(data,columns=["year",'state','pop','debt'],index=['one','two','three','four','five','six'])
print (frame2)
       year   state  pop debt
one    2000    Ohio  1.5  NaN
two    2001    Ohio  1.7  NaN
three  2002    Ohio  3.6  NaN
four   2001  Nevada  2.4  NaN
five   2002  Nevada  2.9  NaN
six    2003  Nevada  3.2  NaN
frame2.columns
Index(['year', 'state', 'pop', 'debt'], dtype='object')
frame2['state']#类似字典的方式
one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object
frame2.year#属性的方式
one      2000
two      2001
three    2002
four     2001
five     2002
six      2003
Name: year, dtype: int64
frame2.loc['three']
year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object
frame2['debt']=16.5
frame2
yearstatepopdebt
one2000Ohio1.516.5
two2001Ohio1.716.5
three2002Ohio3.616.5
four2001Nevada2.416.5
five2002Nevada2.916.5
six2003Nevada3.216.5
frame2['debt']=np.arange(6.)
frame2
yearstatepopdebt
one2000Ohio1.50.0
two2001Ohio1.71.0
three2002Ohio3.62.0
four2001Nevada2.43.0
five2002Nevada2.94.0
six2003Nevada3.25.0
val=pd.Series([-1.2,-1.5,3],index=['one','two','five'])
frame2['debt']=val
frame2
yearstatepopdebt
one2000Ohio1.5-1.2
two2001Ohio1.7-1.5
three2002Ohio3.6NaN
four2001Nevada2.4NaN
five2002Nevada2.93.0
six2003Nevada3.2NaN
frame2['eastern']=frame2.state=='Ohio'
frame2
yearstatepopdebteastern
one2000Ohio1.5-1.2True
two2001Ohio1.7-1.5True
three2002Ohio3.6NaNTrue
four2001Nevada2.4NaNFalse
five2002Nevada2.93.0False
six2003Nevada3.2NaNFalse
del frame2['eastern']
frame2.columns
Index(['year', 'state', 'pop', 'debt'], dtype='object')
pop={'Neveda':{2001:2.4,2002:2.9},
    'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
#嵌套字典传给 DataFrame,pandas 就会被解释为:外层字典的键作为列,内层键则作为行索引
frame3=pd.DataFrame(pop)
frame3
NevedaOhio
2000NaN1.5
20012.41.7
20022.93.6
frame3.T#转置
200020012002
NevedaNaN2.42.9
Ohio1.51.73.6
pd.DataFrame(pop,index=[2001,2002,2003])
NevedaOhio
20012.41.7
20022.93.6
2003NaNNaN
pdata={'Ohio':frame3['Ohio'][:-1],
      'Neveda':frame3['Neveda'][:3]}
pd.DataFrame(pdata)
OhioNeveda
20001.5NaN
20011.72.4
2002NaN2.9
frame3.index.name='year';frame3.columns.name='state'
frame3
stateNevedaOhio
year
2000NaN1.5
20012.41.7
20022.93.6
frame3.values
array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])
frame2.values
array([[2000, 'Ohio', 1.5, -1.2],
       [2001, 'Ohio', 1.7, -1.5],
       [2002, 'Ohio', 3.6, nan],
       [2001, 'Nevada', 2.4, nan],
       [2002, 'Nevada', 2.9, 3.0],
       [2003, 'Nevada', 3.2, nan]], dtype=object)
#索引对象
#pandas 的索引对象负责管理轴标签和其他元数据(比如轴名称等)。
#构建 Series 或 DataFrame 时,所用到的任何数组或其他序列的标签都会被转换成一 个 Index:
obj=pd.Series(range(3),index=['a','b','c'])
index=obj.index
values=obj.values
obj
a    0
b    1
c    2
dtype: int64
index[1:]
Index(['b', 'c'], dtype='object')
index[1]="w"
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-116-287780bd69fa> in <module>
----> 1 index[1]="w"


/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in __setitem__(self, key, value)
   3936 
   3937     def __setitem__(self, key, value):
-> 3938         raise TypeError("Index does not support mutable operations")
   3939 
   3940     def __getitem__(self, key):


TypeError: Index does not support mutable operations
labels=pd.Index(np.arange(3))
labels
Int64Index([0, 1, 2], dtype='int64')
obj2=pd.Series([1.5,-2.5,0],index=labels)
obj2
0    1.5
1   -2.5
2    0.0
dtype: float64
obj2.index is labels
True
frame3
stateNevedaOhio
year
2000NaN1.5
20012.41.7
20022.93.6
frame3.columns
Index(['Neveda', 'Ohio'], dtype='object', name='state')
'Ohio' in frame3.columns
True
2003 in frame3.index
False
dup_labels=pd.Index(['foo','foo','bar','bar'])
dup_labels
Index(['foo', 'foo', 'bar', 'bar'], dtype='object')

基本功能
重新索引

obj=pd.Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
obj
d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64
obj2=obj.reindex(['a','b','c','d','e'])
obj2
a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64
obj3=pd.Series(['blue','purple','yellow'],index=[0,2,4])
obj3
0      blue
2    purple
4    yellow
dtype: object
obj3.reindex(range(6),method='ffill')#ffill向前填充
0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object
frame=pd.DataFrame(np.arange(9).reshape((3,3)),
                  index=['a','c','d'],
                  columns=['Ohio','Texas','California'])
frame
OhioTexasCalifornia
a012
c345
d678
frame2=frame.reindex(['a','b','c','d'])
frame2
OhioTexasCalifornia
a0.01.02.0
bNaNNaNNaN
c3.04.05.0
d6.07.08.0
states=['Texas','Utah','California']
frame.reindex(columns=states)
TexasUtahCalifornia
a1NaN2
c4NaN5
d7NaN8
obj=pd.Series(np.arange(5.),index=['a','b','c','d','e'])
obj
a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64
new_obj=obj.drop('c')
new_obj
a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64
obj.drop(['d','c'])
a    0.0
b    1.0
e    4.0
dtype: float64
data=pd.DataFrame(np.arange(16).reshape(4,4),
                 index=['ohio','Colorado','Utah','Newyork'],
                 columns=['one','two','three','four'])
data
onetwothreefour
ohio0123
Colorado4567
Utah891011
Newyork12131415
data.drop(['Colorado','ohio'])

onetwothreefour
Utah891011
Newyork12131415
data.drop('three',axis=1)
onetwofour
ohio013
Colorado457
Utah8911
Newyork121315
data.drop(['two','four'],axis=1)
onethree
ohio02
Colorado46
Utah810
Newyork1214
obj.drop('c',inplace=True)#许多函数,如 drop,会修改 Series 或 DataFrame 的大小或形状,可以就地修 改对象,不会返回新的对象:
#小心使用 inplace,它会销毁所有被删除的数据。
obj
a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64
obj=pd.Series(np.arange(4.),index=['a','b','c','d'])
obj
a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64
obj['b']
1.0
obj[1]
1.0
obj[2:4]
c    2.0
d    3.0
dtype: float64
obj[[1,3]]
b    1.0
d    3.0
dtype: float64
obj[['b','a','d']]
b    1.0
a    0.0
d    3.0
dtype: float64
obj[obj<2]
a    0.0
b    1.0
dtype: float64
obj['b':'c']
b    1.0
c    2.0
dtype: float64
obj["b":'c']=5
obj
a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64
data=pd.DataFrame(np.arange(16).reshape(4,4),
                 index=['Ohio','Colorado','Utah','California'],
                 columns=['one','two','three','four'])
data
onetwothreefour
Ohio0123
Colorado4567
Utah891011
California12131415
data['two']
Ohio           1
Colorado       5
Utah           9
California    13
Name: two, dtype: int64
data[['three','one']]
threeone
Ohio20
Colorado64
Utah108
California1412
data['two']
Ohio           1
Colorado       5
Utah           9
California    13
Name: two, dtype: int64
data[data['three']>5]
onetwothreefour
Colorado4567
Utah891011
California12131415
data<5
onetwothreefour
OhioTrueTrueTrueTrue
ColoradoTrueFalseFalseFalse
UtahFalseFalseFalseFalse
CaliforniaFalseFalseFalseFalse
data[data<5]=0
data
onetwothreefour
Ohio0000
Colorado0567
Utah891011
California12131415
data.loc['Colorado',['one','two']]
one    0
two    5
Name: Colorado, dtype: int64
data.iloc[2,[3,0,1]]
four    11
one      8
two      9
Name: Utah, dtype: int64
data.iloc[2]
one       8
two       9
three    10
four     11
Name: Utah, dtype: int64
data.iloc[[1,2],[3,0,1]]
fouronetwo
Colorado705
Utah1189
data.loc[:'Utah','two']
Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int64
data.iloc[:,:3][data.three>5]
onetwothree
Colorado056
Utah8910
California121314
ser=pd.Series(np.arange(3.))
ser
0    0.0
1    1.0
2    2.0
dtype: float64
ser2=pd.Series(np.arange(3.),index=['a','b','c'])
ser2[-1]
2.0
ser[:1]
0    0.0
dtype: float64
ser.loc[:1]
0    0.0
1    1.0
dtype: float64
ser.iloc[:1]
0    0.0
dtype: float64
s1=pd.Series([7.3,-2.5,3.4,1.5],index=['a','c','d','e'])
s2=pd.Series([-2.1,3.6,-1.5,4,3.1],index=['a','c','e','f','g'])
s1
a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64
s2
a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64
s1+s2
a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64
df1=pd.DataFrame(np.arange(9.).reshape(3,3),
                columns=list('bcd'),
                index=['Ohio','Texas','Colorato'])
df2=pd.DataFrame(np.arange(12.).reshape(4,3),
                columns=list('bde'),
                index=['Utah','Ohio','Texas','Oregon'])
df1
bcd
Ohio0.01.02.0
Texas3.04.05.0
Colorato6.07.08.0
df2
bde
Utah0.01.02.0
Ohio3.04.05.0
Texas6.07.08.0
Oregon9.010.011.0
df1+df2
bcde
ColoratoNaNNaNNaNNaN
Ohio3.0NaN6.0NaN
OregonNaNNaNNaNNaN
Texas9.0NaN12.0NaN
UtahNaNNaNNaNNaN
df1=pd.DataFrame({'A':[1,2]})
df2=pd.DataFrame({'B':[3,4]})
df1
A
01
12
df2
B
03
14
df1-df2
AB
0NaNNaN
1NaNNaN
df1=pd.DataFrame(np.arange(12.).reshape(3,4),
                columns=list('abcd'))
df2=pd.DataFrame(np.arange(20.).reshape(4,5),
                columns=list('abcde'))
df2.loc[3,'e']=np.nan
df1
abcd
00.01.02.03.0
14.05.06.07.0
28.09.010.011.0
df2
abcde
00.01.02.03.04.0
15.06.07.08.09.0
210.011.012.013.014.0
315.016.017.018.0NaN
df1+df2
abcde
00.02.04.06.0NaN
19.011.013.015.0NaN
218.020.022.024.0NaN
3NaNNaNNaNNaNNaN
df1.add(df2,fill_value=0)
abcde
00.02.04.06.04.0
19.011.013.015.09.0
218.020.022.024.014.0
315.016.017.018.0NaN
1/df1
abcd
0inf1.0000000.5000000.333333
10.2500.2000000.1666670.142857
20.1250.1111110.1000000.090909
df1.rdiv(1)
abcd
0inf1.0000000.5000000.333333
10.2500.2000000.1666670.142857
20.1250.1111110.1000000.090909
df1.reindex(columns=df2.columns,fill_value=0)
abcde
00.01.02.03.00
14.05.06.07.00
28.09.010.011.00
arr=np.arange(12.).reshape(3,4)
arr
array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])
arr[0]
array([0., 1., 2., 3.])
arr-arr[0]#广播
array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])
frame=pd.DataFrame(np.arange(12.).reshape(4,3),
                  columns=list('bde'),
                   index=['Utah','Ohio','Texas','Oregon'])
series=frame.iloc[0]

frame
bde
Utah0.01.02.0
Ohio3.04.05.0
Texas6.07.08.0
Oregon9.010.011.0
series
b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64
frame-series
bde
Utah0.00.00.0
Ohio3.03.03.0
Texas6.06.06.0
Oregon9.09.09.0
series2=pd.Series(range(3),index=['b','e','f'])
frame+series2
abcdef
threeNaN2.0NaNNaNNaNNaN
oneNaN6.0NaNNaNNaNNaN
series3=frame['d']
frame
dabc
three0123
one4567
series3
Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64
frame.sub(series3)
OhioOregonTexasUtahabcd
threeNaNNaNNaNNaNNaNNaNNaNNaN
oneNaNNaNNaNNaNNaNNaNNaNNaN
frame=pd.DataFrame(np.random.randn(4,3),
                  columns=list('bde'),
                  index=['Utah','Ohio','Texas','Oregon'])
frame
bde
Utah-0.6740381.4058831.123189
Ohio-0.9019231.277413-0.979557
Texas0.063148-2.0401450.155796
Oregon-1.667468-0.353109-0.616387
np.abs(frame)
bde
Utah0.6740381.4058831.123189
Ohio0.9019231.2774130.979557
Texas0.0631482.0401450.155796
Oregon1.6674680.3531090.616387
f=lambda x:x.max()-x.min()
frame.apply(f)
b    1.730616
d    3.446028
e    2.102747
dtype: float64
frame.apply(f,axis='columns')
Utah      2.079921
Ohio      2.256970
Texas     2.195941
Oregon    1.314358
dtype: float64
def f(x):
    return pd.Series([x.min(),x.max()],index=["min","max"])
frame.apply(f)
bde
min-1.667468-2.040145-0.979557
max0.0631481.4058831.123189
format=lambda x:'%.2f'%x
frame.applymap(format)
bde
Utah-1.750.130.25
Ohio1.77-0.760.57
Texas0.17-0.840.40
Oregon0.200.20-1.46
frame['e'].map(format)
Utah       0.25
Ohio       0.57
Texas      0.40
Oregon    -1.46
Name: e, dtype: object
obj=pd.Series(range(4),index=['d','a','b','c'])
obj.sort_index()
a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64
frame=pd.DataFrame(np.arange(8).reshape(2,4),
                  index=['three','one'],
                  columns=['d','a','b','c'])
frame.sort_index()
dabc
one4567
three0123
frame.sort_index(axis=1)
abcd
three1230
one5674
frame.sort_index(axis=1,ascending=False)
dcba
three0321
one4765
obj=pd.Series([4,7,-3,2])
obj.sort_values()
2   -3
3    2
0    4
1    7
dtype: int64
obj=pd.Series([4,np.nan,7,np.nan,-3,2])
obj.sort_values()
4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64
frame=pd.DataFrame({'a':[4,7,-3,2],'b':[0,1,0,1]})
frame
frame.sort_values(by='b')
frame.sort_values(by=['a','b'])
obj=pd.Series([7,-5,7,4,2,0,4])
obj.rank()
0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64
obj.rank(method='first')
0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64
obj.rank(ascending=False,method='max')
0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64
frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0,1],'c': [-2, 5, 8, -2.5]})
frame

bac
04.30-2.0
17.015.0
2-3.008.0
32.01-2.5
frame.rank(axis='columns')
bac
03.02.01.0
13.01.02.0
21.02.03.0
33.02.01.0
obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
obj
a    0
a    1
b    2
b    3
c    4
dtype: int64
obj.index.is_unique
False
obj['a']
a    0
a    1
dtype: int64
obj['c']
4
df=pd.DataFrame(np.random.randn(4,3),index=['a','a','b','b'])
df
012
a1.0952400.1370700.533132
a0.470992-0.038642-0.118522
b0.509320-0.0951651.565080
b1.551403-0.0280620.090268
df.loc['b']
012
b0.509320-0.0951651.565080
b1.551403-0.0280620.090268
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],[np.nan, np.nan], [0.75, -1.3]],
index=['a', 'b', 'c', 'd'],
columns=['one', 'two'])
df
onetwo
a1.40NaN
b7.10-4.5
cNaNNaN
d0.75-1.3
df.sum()
one    9.25
two   -5.80
dtype: float64
df.sum(axis=1)
a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64
df.mean(axis='columns',skipna=False)
a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64
df.idxmin()
one    d
two    b
dtype: object
df.cumsum()
onetwo
a1.40NaN
b8.50-4.5
cNaNNaN
d9.25-5.8
obj=pd.Series(['a','a','b','c']*4)
obj.describe()
count     16
unique     3
top        a
freq       8
dtype: object
import pandas_datareader.data as web
all_data = {ticker: web.get_data_yahoo(ticker)
            for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}
price = pd.DataFrame({ticker: data['Adj Close']
                      for ticker, data in all_data.items()})
volume = pd.DataFrame({ticker: data['Volume']
                       for ticker, data in all_data.items()})
returns = price.pct_change()
returns.tail()

AAPLIBMMSFTGOOG
Date
2019-08-130.0423480.0124520.0206940.019205
2019-08-14-0.029765-0.033434-0.030114-0.027546
2019-08-15-0.0049810.005105-0.0022390.002551
2019-08-160.0235950.0139480.0183270.008858
2019-08-190.0263440.0167090.0120470.022393
returns['MSFT'].corr(returns['IBM'])
0.4905235623531012
 returns['MSFT'].cov(returns['IBM'])
8.766298066095883e-05
returns.MSFT.corr(returns.IBM)
0.4905235623531012
returns.corr()
AAPLIBMMSFTGOOG
AAPL1.0000000.3841930.4558950.461466
IBM0.3841931.0000000.4905240.404765
MSFT0.4558950.4905241.0000000.537158
GOOG0.4614660.4047650.5371581.000000
returns.cov()
AAPLIBMMSFTGOOG
AAPL0.0002670.0000780.0001080.000117
IBM0.0000780.0001530.0000880.000078
MSFT0.0001080.0000880.0002090.000121
GOOG0.0001170.0000780.0001210.000242
returns.corrwith(returns.IBM)
AAPL    0.384193
IBM     1.000000
MSFT    0.490524
GOOG    0.404765
dtype: float64
returns.corrwith(volume)
AAPL   -0.062747
IBM    -0.152642
MSFT   -0.090553
GOOG   -0.019246
dtype: float64
obj = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c',
'c'])
uniques=obj.unique()
uniques
array(['c', 'a', 'd', 'b'], dtype=object)
obj.value_counts()
a    3
c    3
b    2
d    1
dtype: int64
pd.value_counts(obj.values,sort=False)
c    3
d    1
b    2
a    3
dtype: int64
obj
0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object
mask=obj.isin(['b','c'])
mask
0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool
obj[mask]
0    c
5    b
6    b
7    c
8    c
dtype: object
to_match = pd.Series(['c', 'r', 'b', 'b', 'd', 'a'])
unique_vals = pd.Series(['c', 'b', 'a'])
pd.Index(unique_vals).get_indexer(to_match)


array([ 0, -1,  1,  1, -1,  2])
data = pd.DataFrame({'Qu1': [1, 3, 4, 3, 4],
                     'Qu2': [2, 3, 1, 2, 3],
                     'Qu3': [1, 5, 2, 4, 4]})
data
Qu1Qu2Qu3
0121
1335
2412
3324
4434
result=data.apply(pd.value_counts).fillna(0)
result
Qu1Qu2Qu3
11.01.01.0
20.02.01.0
32.02.00.0
42.00.02.0
50.00.01.0








评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值