Python数据分析与挖掘实战P153
#-*_coding:utf-8 _*_
import pandas as pd
from scipy.interpolate import lagrange
inputfile='G:/学习资料/统计/chapter6/demo/data/missing_data.xls'
outputfile='G:/学习资料/统计/chapter6/demo//tmp/missing_data_processed1.xls'
data=pd.read_excel(inputfile, header=None)
def ployinterp_column(s,n,k=5):
y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]
y=y[y.notnull()]
return lagrange(y.index, list(y))(n)
for i in data.columns:
for j in range(len(data)):
if (data[i].isnull())[j]:
data[i][j]=ployinterp_column(data[i],j)
data.to_excel(outputfile,header=None,index=False)
遇到的问题:
1.ImportError: No module named
'xlrd'
在cmd命令行安装xlrd,代码:pip install xlrd
2.File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\pandas\core\serie