# 疾病预测和天气分析练习赛

import scipy.stats as st
import pandas as pd
pd.options.display.max_columns = None
pd.options.display.max_rows = None
path1 = "/home/kesci/input/liver_df9751/结构化数据训练营.csv"    # chipotle.tsv
aveTime = df['Weight\n体重'].median()
chipo['Weight\n体重'].nunique()
df2 = df.fillna(aveTime)
col = df2.iloc[:, 3]
arrs = col.values
##print(arrs)
w=st.skew(arrs) # 计算偏度
## 0.7565543738808015
print('%.4f'%w)



boxcox1p变换：boxcox1p（）

import scipy.stats as st
import pandas as pd
from scipy.special import boxcox1p
pd.options.display.max_columns = None
pd.options.display.max_rows = None
path1 = "/home/kesci/input/liver_df9751/结构化数据训练营.csv"    # chipotle.tsv

aveTime = df['Weight\n体重'].median()
wt = df['Weight\n体重'].fillna(aveTime)

lam=0.1
wt = boxcox1p(wt, lam)
w=st.skew(wt.values) # 计算偏度
## 0.7565543738808015
print('%.4f'%w)



import scipy.stats as st
import pandas as pd
import regex as re
pd.options.display.max_columns = None
pd.options.display.max_rows = None
path1 = "/home/kesci/input/liver_df9751/结构化数据训练营.csv"    # chipotle.tsv
# path2="/home/kesci/inputver_df9751/结构化数据训练营测试集.csv"
col_names=list(data.columns)
col=[]
for i in range(len(col_names)):
if re.findall(r"\u2028(.+)",col_names[i])!=[]:
col.append(re.findall(r"\u2028(.+)",col_names[i])[0])
elif re.findall(r"\n(.+)",col_names[i])!=[]:
col.append(re.findall(r"\n(.+)",col_names[i])[0])
else:
col.append(col_names[i])
## 修改dataframe列名
data.columns=col
feature1 = ['体重','年龄','ALF']
for i in feature1:
ave=data[i].median()
data[i] = data[i].fillna(ave)
print(data[i].values)
a_zi=[]
for i in range(len(data)):
c=[data['体重'][i],data['年龄'][i]]
a_zi.append(c)

from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(a_zi, data['ALF'])
cnt=0
for i in range(len(a_zi)):
if(neigh.predict([a_zi[i]])==data['ALF'][i]):
cnt+=1
print(len(a_zi),cnt,len(a_zi)-cnt)



• 点赞
• 评论
• 分享
x

海报分享

扫一扫，分享海报

• 收藏
• 手机看

分享到微信朋友圈

x

扫一扫，手机阅读

• 打赏

打赏

Jozky86

你的鼓励将是我创作的最大动力

C币 余额
2C币 4C币 6C币 10C币 20C币 50C币
• 一键三连

点赞Mark关注该博主, 随时了解TA的最新博文
04-24
10-16 51

04-06 3608
05-02
05-13