一、生成数据表
1.创建一个列表
import pandas as pd
import numpy as np
s=pd.Series([1,3,6,np.nan,44,1])
print(s)
2.生成index索引(行)
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
print(dates)
3.生成数据表
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df)
4.输出行/列的索引
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df.index)
print(df.columns)
5.输出值
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df.values)
6.数据表描述
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df.describe())
7.按数据表索引排序
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
df.sort_index(axis=1,ascending=False) #0行1列
print(df)
8.按数据表值排序
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
df.sort_values(by='a')
print(df)
二、选择数据
1.直接通过 columns 和 index 选择
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df['a']) #选择第a列
#print(df.a)
print(df[0:3]) #选择第0-2行
#print(df['20160101':'20160103'])
2.通过标签 loc 选择
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df.loc['20160102'])
#df.loc[:,['a','b']]
#df.loc['20160102',['a','b']]
3.通过数字 iloc 选择
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df.iloc[3:5,1:3])
三、设置值(nan)
1.丢弃nan值
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
df.iloc[0,1]=np.nan
df.iloc[1,2]=np.nan
df1=df.dropna(axis=0,how='any') #how={'any','all'} any任意出现nan全丢弃 all:全为nan 丢弃
print(df1)
2.填充nan值
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
df.iloc[0,1]=np.nan
df.iloc[1,2]=np.nan
df1=df.fillna(value=0)
print(df1)
3.判断是否存在nan
import pandas as pd
import numpy as np
dates=pd.date_range('20160101',periods=6)
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
df.iloc[0,1]=np.nan
df.iloc[1,2]=np.nan
df1=df.isnull()
whether=np.any(df.isnull()==True)
print(df1)
print(whether) #判断是否有缺失值存在
本文参考资料-----【莫烦Python】 系列教程