import pandas as pd
import numpy as np
s=pd.Series([1,3,6,np.nan,44,1])
print(s)
t=pd.Series(1,index=list(range(4)))
print(t)
dates=pd.date_range('20210101',periods=6)
print(dates)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df)
df1=pd.DataFrame(np.arange(12).reshape(3,4))
print(df1)
df2=pd.DataFrame({'a':1.,'b':pd.Timestamp('20130102'),'c':pd.Series(1,index=list(range(4)),dtype='float32'),'d':np.array([3]*4,dtype='int32'),'e':pd.Categorical(["test","train","test","train"]),'f':'foo'})
print(df2)
print(df2.dtypes)
print(df2.index)
print(df2.columns)
print(df2.values)
print(df2.describe())
print(df2.T)
print(df2.sort_index(axis=0,ascending=False))
print(df2.sort_values(by='e'))
import pandas as pd
import numpy as np
dates=pd.date_range('20210101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
print(df['A'],df.A)
print(df[0:3],df['20210101':'20210103'])
print(df.loc['20210101'])
print(df.loc[:,['A','B']])
print(df.loc['20210101':'20210106','A':'C'])
print(df.loc['20210101',['A','B']])
import pandas as pd
import numpy as np
dates=pd.date_range('20210101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
print(df)
print(df.iloc[3])
print(df.iloc[3,1])
print(df.iloc[3:5,1])
print(df.iloc[3:5,0:3])
print(df.iloc[[0,1,3],[0,1,3]])
import pandas as pd
import numpy as np
dates=pd.date_range('20210101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
print(df.ix[:3,['A','B']])
import pandas as pd
import numpy as np
dates=pd.date_range('20210101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
print(df)
print(df[df.A>8])
print(df>8)
print(df.loc[:,'A'][df.B>5])
import pandas as pd
import numpy as np
dates=pd.date_range('20210101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
df.iloc[2,2]=1111
print(df)
df.loc['20210101','B']=2222
print(df)
df.loc[:,'A'][df.A>0]=0
print(df)
df['F']=np.nan
print(df)
df['E']=pd.Series([1,2,3,4,5,6],index=df.index)
print(df)
import pandas as pd
import numpy as np
dates=pd.date_range('20210101',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
df.iloc[0,1]=np.nan
df.iloc[1,2]=np.nan
print(df)
print(df.dropna(axis=1,how='any'))
print(df.fillna(value=0))
print(df.isnull())
print(np.any(df.isnull())==True)
import pandas as pd
import os
data=pd.read_csv('D:/demo/student.csv')
print(data)
data.to_pickle('student.pickle')
import pandas as pd
import numpy as np
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3=pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)
res1 = pd.concat([df1,df2,df3],axis=0)
res2 = pd.concat([df1,df2,df3],axis=1)
print(res1)
print(res2)
res3 = pd.concat([df1,df2,df3],axis=0,ignore_index=True)
print(res3)
import pandas as pd
import numpy as np
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index=[1,2,3])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e'],index=[2,3,4])
print(df1)
print(df2)
res1=pd.concat([df1,df2])
print(res1)
res2=pd.concat([df1,df2],join='inner',ignore_index=True)
print(res2)
import pandas as pd
import numpy as np
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index=[1,2,3])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e'],index=[2,3,4])
res1=pd.concat([df1,df2],axis=1,join_axes=[df1.index])
print(res1)
import pandas as pd
import numpy as np
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3=pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
res1=df1.append([df2,df3],ignore_index=True)
print(res1)
s1=pd.Series([1,2,3,4],index=['a','b','c','d'])
print(s1)
res2=df1.append(s1,ignore_index=True)
print(res2)
import pandas as pd
left=pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right=pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
print(left)
print(right)
res1=pd.merge(left,right,on='key')
print(res1)
import pandas as pd
left=pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right=pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
print(left)
print(right)
res1=pd.merge(left,right,on=['key1','key2'],how='inner')
res2=pd.merge(left,right,on=['key1','key2'],how='outer')
res3=pd.merge(left,right,on=['key1','key2'],how='left')
res4=pd.merge(left,right,on=['key1','key2'],how='right')
print(res1)
print(res2)
print(res3)
print(res4)
import pandas as pd
df1=pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
df2=pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
print(df1)
print(df2)
res1=pd.merge(df1,df2,on='col1',how='outer',indicator=True)
print(res1)
import pandas as pd
left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},index=['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C1','C2'],'D':['D0','D1','D2']},index=['K0','K2','K3'])
print(left)
print(right)
res1=pd.merge(left,right,left_index=True,right_index=True,how='outer')
print(res1)
import pandas as pd
boys=pd.DataFrame({'k':['k0','k1','k2'],'age':[1,2,3]})
girls=pd.DataFrame({'k':['k0','k0','k3'],'age':[4,5,6]})
res1=pd.merge(boys,girls,on='k',suffixes=['_boys','_girls'],how='outer')
print(res1)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data1 = pd.Series(np.random.randn(1000),index=np.arange(1000))
data1=data1.cumsum()
data1.plot()
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data2=pd.DataFrame(np.random.randn(1000,4),index=np.arange(1000),columns=list("ABCD"))
data2=data2.cumsum()
print(data2.head())
data2.plot()
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data2=pd.DataFrame(np.random.randn(1000,4),index=np.arange(1000),columns=list("ABCD"))
data2=data2.cumsum()
ax=data2.plot.scatter(x='A',y='B',label='Class1')
data2.plot.scatter(x='A',y='C',label='Class2',ax=ax)
plt.show()
```python
在这里插入代码片