python pandas和数据清理

pandas

import pandas as pd

print(pd.__version__)
#定义字典
mydataset = {
    'sites' : ["Google","Runoob","WiKi"],
    'number' : [1,2,3]
}

#将字典转换为dataframe,才能处理
mydf=pd.DataFrame(mydataset)
print(mydf)

a = [1,2,3]
mysr = pd.Series(a,name="aha")
print(mysr)
print(mysr[1])

b = ["Google","Runoob","WiKi"]
myvar = pd.Series(b,index=["x","y","z"])
print(myvar['y'])

sites = {1:"Google",2:"Runoob",3:"WiKi"}
myvar2 = pd.Series(sites)
print(myvar2[3])

sites = {1:"Google",2:"Runoob",3:"WiKi"}
myvar3 = pd.Series(sites,index=[1,2])
print(myvar3)

data = {
    "calories":[420,280,390],
    "duration":[50,40,45]
}
mydf4=pd.DataFrame(data)
print(mydf4.loc[0])
print(mydf4.loc[[0,1]])

data = {
    "calories":[420,280,390],
    "duration":[50,40,45]
}
mydf5=pd.DataFrame(data,index=['row1','row2','row3'])
print(mydf5)

 数据清理

import pandas as pd

df = pd.read_csv('./sss.csv')

# print(df)

# 打印某一列,判断某一列是否有空值
# print(df['NUM_BEDROMMS'])
# print(df['NUM_BEDROMMS'].isnull())

# inplace修改原数据
# df2=df.dropna(inplace=True)
# print(df2)

# 只查看某一列
# df3=df.dropna(subes=['ST_NUM'])
# print(df3)

# 替换脏数据
# df4=df.fillna('666')
# print(df4)

# 替换某一列空值
df['PID'].fillna(123456, inplace=True)
print(df)

# 平均数填充空值
avg=df['ST_NUM'].mean()
# avg=df['ST_NUM'].madian()
# avg=df['ST_NUM'].mode()
df.fillna(avg,inplace=True)
print(df)
import pandas as pd

data={
      "Date":['2020/12/01','2020/12/02','20201226'],
      "duration":[50,40,45]
    }
df = pd.DataFrame(data,index=['day1','day2','day3'])

df['Date']=pd.to_datetime(df['Date'])
print(df)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值