pandas
import pandas as pd
print(pd.__version__)
#定义字典
mydataset = {
'sites' : ["Google","Runoob","WiKi"],
'number' : [1,2,3]
}
#将字典转换为dataframe,才能处理
mydf=pd.DataFrame(mydataset)
print(mydf)
a = [1,2,3]
mysr = pd.Series(a,name="aha")
print(mysr)
print(mysr[1])
b = ["Google","Runoob","WiKi"]
myvar = pd.Series(b,index=["x","y","z"])
print(myvar['y'])
sites = {1:"Google",2:"Runoob",3:"WiKi"}
myvar2 = pd.Series(sites)
print(myvar2[3])
sites = {1:"Google",2:"Runoob",3:"WiKi"}
myvar3 = pd.Series(sites,index=[1,2])
print(myvar3)
data = {
"calories":[420,280,390],
"duration":[50,40,45]
}
mydf4=pd.DataFrame(data)
print(mydf4.loc[0])
print(mydf4.loc[[0,1]])
data = {
"calories":[420,280,390],
"duration":[50,40,45]
}
mydf5=pd.DataFrame(data,index=['row1','row2','row3'])
print(mydf5)
数据清理
import pandas as pd
df = pd.read_csv('./sss.csv')
# print(df)
# 打印某一列,判断某一列是否有空值
# print(df['NUM_BEDROMMS'])
# print(df['NUM_BEDROMMS'].isnull())
# inplace修改原数据
# df2=df.dropna(inplace=True)
# print(df2)
# 只查看某一列
# df3=df.dropna(subes=['ST_NUM'])
# print(df3)
# 替换脏数据
# df4=df.fillna('666')
# print(df4)
# 替换某一列空值
df['PID'].fillna(123456, inplace=True)
print(df)
# 平均数填充空值
avg=df['ST_NUM'].mean()
# avg=df['ST_NUM'].madian()
# avg=df['ST_NUM'].mode()
df.fillna(avg,inplace=True)
print(df)
import pandas as pd
data={
"Date":['2020/12/01','2020/12/02','20201226'],
"duration":[50,40,45]
}
df = pd.DataFrame(data,index=['day1','day2','day3'])
df['Date']=pd.to_datetime(df['Date'])
print(df)