pandas简单操作

#实训1
from scipy.interpolate import lagrange
import pandas as pd
data=pd.read_csv("C:\\data\\missing_data.csv",encoding='gbk',header=None)
def ployinterp_column(s,n,k=5):
    y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]
    y=y[y.notnull()]
    return lagrange(y.index,list(y))(n)
for i in data.columns:
    for j in range(len(data)):
        if (data[i].isnull())[j]:
            data[i][j]=ployinterp_column(data[i],j)
data.isnull().sum()
#实训2
data2=pd.read_csv("C:\\data\\ele_loss.csv",encoding='gbk')
data3=pd.read_csv("C:\\data\\alarm.csv",encoding='gbk')
data2.shape
data3.shape
df=pd.merge(data2,data3,on=["ID","date"])
df2=pd.concat([data2,data3],axis=1,join='inner')
df3=pd.concat([data2,data3],axis=1,join='outer')
df4=pd.concat([data2,data3],axis=0,join='outer')
df5=pd.concat([data2,data3],axis=0,join='inner')

#实训3
model=pd.read_csv("C:\\data\\model.csv",encoding='gbk')
def StandarScale(data):
    data=(data-data.mean())/data.std()
    return data
StandarScale(model["电量趋势下降指标"])
StandarScale(model["线损指标"])
StandarScale(model['告警类指标'])
StandarScale(model['是否窃漏电'])
std1=StandarScale(model.iloc[:,0])
std2=StandarScale(model.iloc[:,1])
std3=StandarScale(model.iloc[:,2])
std4=StandarScale(model.iloc[:,3])
model_std=pd.concat([std1,std2,std3,std4],axis=1)


#第一题
import numpy as np
import pandas as pd
data=pd.Series([200,300,400,600,1000])
def MinMaxScale(data):
    data=(data-data.min())/(data.max()-data.min())
    return data
def StandardScale(data):
    data=(data-data.mean())/data.std()
    return data
def DecimalScale(data):
    data=data/(10**np.ceil(np.log10(data.abs().max())))
    return data
MinMaxScale(data)
StandardScale(data)
DecimalScale(data)

#第二题
price=pd.Series([5,10,11,13,15,35,50,55,72,92,204,215])
cut=pd.cut(price,3)
cut.value_counts()

#第三题
def Delete_Fill(data):
    #行删除
    data.drop_duplicates(inplace=True) 
    # 列删除
    dfEquals=pd.DataFrame([],columns=data.columns,index=data.columns)
    for i in data.columns:
        for j in data.columns:
            dfEquals.loc[i,j]=df[:,i].equals(data.loc[:,j])
    dupCol=[]
    for k in range(dfEquals.shape[0]):
        for l in range(k+1,dfEquals.shape[0]):
            if dfEquals.iloc[k,l]&(dfEquals.columns[l] not in dupCol):
                dupCol.append(dfEquals.columns[l])
    data.drop(dupCol,axis=1,inplace=True)
    #填充中位数
    x=data.median()
    data.fillna(x,inplace=True)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值