数据分析模型总结
1.逻辑回归
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(penalty='l1',solver='saga',tol=0.1,C=0.1,random_state=0)
clf.fit(X_train_bal,y_train_bal)
y_pred = clf.predict(X_test)
2.K近邻法(knn)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=4,p=p) #p默认为2 1时是曼哈顿距离,2是欧几里得距离
knn.fit(X_train_bal,y_train_bal)
y_pred = knn.predict(X_test)
3.套索回归(Lasso回归)
from sklearn.linear_model import Lasso
mod = Lasso(alpha=0.1,tol=0.01,random_state=0)
mod.fit(X_train,y_train)
y_pred = mod.predict(X_test)
4.最小二乘线性(OLS)回归
import statsmodels.api as sm
model = sm.OLS(y_train,sm.add_constant(X_train))
results = model.fit()
results.summary()#查看P>|t|的值>0.05的舍去
y_pred = results.predict(sm.add_constant(X_test))
5.高斯朴素贝叶斯:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB(priors=[0.5,0.5])
clf.fit(X_train_bal_new,y_rain_bal)
y_pred = clf.predict(X_test_new)