import pandas as pd
from scipy.interpolate import lagrange
data = pd.read_csv('missing_data.csv',sep=',',encoding='gbk')
data1 = data[data.isnull().values==True]
#print(miss.iloc[2,1])
print(data1)
#自定义列向量插值函数
def ploy(s,n,k=6):
y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]#取数
y=y[y.notnull()]
return lagrange(y.index,list(y))(n)
for i in data.columns:
for j in range(len(data)):
if(data[i].isnull())[j]:
data[i][j]=ploy(data[i],j)
print(data.isnull().sum())
data.to_excel('misslagrange.xlsx')
import pandas as pd
import numpy as np
ele = pd.read_csv('ele_loss.csv',sep=',',encoding='gbk')
ala = pd.read_csv('alarm.csv',sep=',',encoding='gbk')
print(ele.shape)
print(ala.shape)
ele_ala = pd.merge(ele, ala, left_on=['ID', 'date'], right_on=['ID', 'date'], how='inner')
print(ele_ala.shape)
print(ele_ala)
import pandas as pd
model = pd.read_csv('model.csv',sep=',',encoding='gbk')
def StandardScale(data):
data = (data-data.mean())/data.std()
return data
#print(model.columns)
print(model)
data1 = StandardScale(model['电量趋势下降指标'])
data2 = StandardScale(model['线损指标'])
data3 = StandardScale(model['是否窃漏电'])
print('标准差标准化后:\n',data1.head())
print('标准差标准化后:\n',data2.head())
print('标准差标准化后:\n',data3.head())