import numpy as np
shuffle_row = np.random.permutation(cars.index)
shuffle_car = cars.iloc[shuffle_row]
high_train_row = int(cars.shape[0]*.70)
train = shuffle_car.iloc[0:high_train_row]
test = shuffle_car.iloc[high_train_row:]
#取出origin的值
unique_origin=cars["origin"].unique()
print unique_origin
unique_origin.sort()
models = {}
feature = [c for c in train.columns if c.startswith("cyl") or c.startswith("year")]
print feature
from sklearn.linear_model import LogisticRegression
for origin in unique_origin:
model1=LogisticRegression()
x_train=train[feature]
#将当前的数字做为正力
y_train=train["origin"]==origin
model1.fit(x_train,y_train)
#将训练完的模型保存起来
models[origin]=model1
testing_probs = pd.DataFrame(columns=unique_origin)
print testing_probs
for origin in unique_origin:
X_test=test[feature]
testing_probs[origin]=models[origin].predict_proba(X_test)[:,1]
print testing_probs.head(3)