from sklearn.linear_model import LogisticRegression import pandas as pd from sklearn.cross_validation import train_test_split from sklearn.preprocessing import StandardScaler import pandas as pd import numpy as np from sklearn.linear_model import SGDClassifier from sklearn.metrics import classification_report from sklearn.preprocessing import OneHotEncoder df = pd.read_csv('D:\\DEV_DATA\\AI\\Iris\\iris_origin.csv', names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class']) #setosa为正类,其他为负类 #df['class']=df['class'].apply(lambda v: 1 if v == 'setosa' else 0) x = df.ix[:, :4] y = df.ix[:, 4] #特征缩放,标准化处理 sc = StandardScaler() X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.3) sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test)
lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, Y_train) print(Y_test) predict = lr.predict(X_test_std) print(lr.score(X_test_std,Y_test)) print(classification_report(Y_test, predict)) print('---------------------') x = sc.transform([[6.5,2.8,4.6,1.5]]) y = lr.predict(x) print(y)