py数据分析课程

pandas常用

数据文件的读写

import pandas as pd

df1 = pd.read_excel(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\stock_data.xlsx',"Sheet1")

#1,读取excel
df2 = pd.read_excel('F:/a_houdashi_work/bbb_python学习/python高薪全能班/pandas/课程代码/stock_data.xlsx',"Sheet1")

#2,读取处理异常值
def convert_price(t):
    if t == 'n.a.':
        return 250
    return t

df3 = pd.read_excel(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\stock_data.xlsx',
                    "Sheet1",converters={
                                            'price':convert_price
                                            })
df2.dtypes
df3.dtypes
#3,写出数据到excel
df2.to_excel(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\w1.xlsx',
             sheet_name='weather',index=False)

df3.to_excel(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\w2.xlsx',
             sheet_name='weather',index=False,startrow=3,startcol=4)

#多4,个写入
with pd.ExcelWriter(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\combine.xlsx') as writer:
    df1.to_excel(writer,sheet_name='df1',index=False)
    df2.to_excel(writer,sheet_name='df2')

缺失数据的补偿方法

 

import pandas as pd

#1,导入数据,处理字段为时间类型
df4 = pd.read_csv(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\weather_data.csv')
df4 = pd.read_csv(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\weather_data.csv'
                  ,parse_dates=['day'])

df4.head(10)
df4.info()
type(df4.day)
type(df4.day[0])

#2,手动补缺
df4.iloc[0,1]
df4.iloc[0,0]=44

#3,使用fillna填充空值
new_df4 = df4.fillna(0)
new_df4 = df4.fillna(
                     {
                      'temperature' :250,
                      'event':'No Event',
                      'windspeed':'77'
                    
                      }
                     )
#ffill填充
new_df4 = df4.fillna(method='ffill')
new_df4 = df4.fillna(method='bfill')
new_df4 = df4.fillna(method='bfill',axis='columns')
new_df4 = df4.fillna(method='bfill',limit=2) #空值往下填充几个

#4,使用finterpolate填充空值,拟合过渡器
newdf = df4.interpolate()
newdf = df4.interpolate(method="time")

#5,删除法 dropna()
new_df = df4.dropna()
new_df = df4.dropna(thresh=3) #只要有3个字段有数据就留下

替换

 

import pandas as pd
import numpy  as np


#1,导入数据,处理字段为时间类型

df5 = pd.read_csv(r'F:\a_houdashi_work\bbb_python学习\python高薪全能班\pandas\课程代码\weather_data.csv'
                  ,parse_dates=['day'])

#1,单值替换
new_df5 = df5.replace(-99999,value=np.NAN)
new_df5.head(10)

df5.iloc[4,2]=-8888

new_df5 = df5.replace(to_replace=[-99999,-8888],value=np.NAN)
new_df5.head()

#2,替换专门列
new_df5 = df5.replace(
                      {
                       'event':'0'
                       },np.NAN
                      )
type(df5.event[0])
new_df5.head()

#3,使用map表替换
newdf = df5.replace(
                    {
                     -99999:np.nan,
                     'Sunny':'Sunny222'
                     }
                    
                     )
newdf 

#4,正则替换法
newdf = df5.replace(
                   {
                    'temperature':'[A-Za-z]'
                    },'',regex=True
                   )
newdf
newdf.iloc[3,1] ='abcd'

newdf2 = newdf.replace(
                   {
                    'temperature':'[A-Za-z]'
                    },'',regex=True
                   )
newdf2
#5,列表替换
df = pd.DataFrame(
                  {
                   'score':['a','b','c','d']
                   }
                  )
df = df.replace(['a','b','c','d'],[5,4,3,2])

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值