#实训1
from scipy.interpolate import lagrange
import pandas as pd
data=pd.read_csv("C:\\data\\missing_data.csv",encoding='gbk',header=None)
def ployinterp_column(s,n,k=5):
y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]
y=y[y.notnull()]
return lagrange(y.index,list(y))(n)
for i in data.columns:
for j in range(len(data)):
if (data[i].isnull())[j]:
data[i][j]=ployinterp_column(data[i],j)
data.isnull().sum()
#实训2
data2=pd.read_csv("C:\\data\\ele_loss.csv",encoding='gbk')
data3=pd.read_csv("C:\\data\\alarm.csv",encoding='gbk')
data2.shape
data3.shape
df=pd.merge(data2,data3,on=["ID","date"])
df2=pd.concat([data2,data3],axis=1,join='inner')
df3=pd.concat([data2,data3],axis=1,join='outer')
df4=pd.concat([data2,data3],axis=0,join='outer')
df5=pd.concat([data2,data3],axis=0,join='inner')
#实训3
model=pd.read_csv("C:\\data\\model.csv",encoding='gbk')
def StandarScale(data):
data=(data-data.mean())/data.std()
return data
StandarScale(model["电量趋势下降指标"])
StandarScale(model["线损指标"])
StandarScale(model['告警类指标'])
StandarScale(model['是否窃漏电'])
std1=StandarScale(model.iloc[:,0])
std2=StandarScale(model.iloc[:,1])
std3=StandarScale(model.iloc[:,2])
std4=StandarScale(model.iloc[:,3])
model_std=pd.concat([std1,std2,std3,std4],axis=1)
#第一题
import numpy as np
import pandas as pd
data=pd.Series([200,300,400,600,1000])
def MinMaxScale(data):
data=(data-data.min())/(data.max()-data.min())
return data
def StandardScale(data):
data=(data-data.mean())/data.std()
return data
def DecimalScale(data):
data=data/(10**np.ceil(np.log10(data.abs().max())))
return data
MinMaxScale(data)
StandardScale(data)
DecimalScale(data)
#第二题
price=pd.Series([5,10,11,13,15,35,50,55,72,92,204,215])
cut=pd.cut(price,3)
cut.value_counts()
#第三题
def Delete_Fill(data):
#行删除
data.drop_duplicates(inplace=True)
# 列删除
dfEquals=pd.DataFrame([],columns=data.columns,index=data.columns)
for i in data.columns:
for j in data.columns:
dfEquals.loc[i,j]=df[:,i].equals(data.loc[:,j])
dupCol=[]
for k in range(dfEquals.shape[0]):
for l in range(k+1,dfEquals.shape[0]):
if dfEquals.iloc[k,l]&(dfEquals.columns[l] not in dupCol):
dupCol.append(dfEquals.columns[l])
data.drop(dupCol,axis=1,inplace=True)
#填充中位数
x=data.median()
data.fillna(x,inplace=True)
pandas简单操作
最新推荐文章于 2024-06-18 14:40:42 发布