一、对篮球运动员分类
import pandas as pd
t1=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\data.csv")
t2=t1.iloc[:,1:]
from sklearn.cluster import KMeans
cls=KMeans(n_clusters=5)
pre=cls.fit_predict(t2)
print(pre)
import matplotlib.pyplot as plt
x=t2.iloc[:,0]
y=t2.iloc[:,1]
z=pre
xx=plt.scatter(x,y,c=z,marker="o")
plt.xlabel("每分钟助攻次数")
plt.ylabel("每分钟得分次数")
plt.rcParams["font.sans-serif"]=["SimHei"]
plt.rcParams["axes.unicode_minus"]=False
plt.show()
二、P217实训2
import pandas as pd
t2=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\credit_card.csv",encoding="GBK")
print(t2.columns)
case1=(t2["逾期"]==1)
case2=(t2["呆账"]==1)
case3=(t2["强制停卡记录"]==1)
case4=(t2["退票"]==1)
case5=(t2["拒往记录"]==1)
case6=(t2["瑕疵户"]==2)
cases1= case1 & case2 & case3 & case4 & case5 & case6
t2=t2[(~cases1)]
print(t2.shape[0])
case1=(t2["呆账"]==1)
case2=(t2["强制停卡记录"]==1)
case3=(t2["退票"]==1)
case4=(t2["拒往记录"]==2)
cases2= case1 & case2 & case3 & case4
t2=t2[(~cases2)]
print(t2.shape[0])
case1=(t2["频率"]==5)
case2=(t2["月刷卡额"]!=1)
cases3= case1 & case2
t2=t2[(~cases3)]
print(t2.shape[0])
t2