源码下载:
http://download.csdn.net/download/adam_zs/10218906
'''线性回归 做回归
预测车每加仑油能跑多远'''
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)
columns = ["mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "model year", "origin",
"car name"]
cars = pd.read_table("auto-mpg.data", delim_whitespace=True, names=columns)
'''
mpg cylinders displacement horsepower weight acceleration model year origin car name
0 18.0 8 307.0 130.0 3504.0 12.0 70 1 chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693.0 11.5 70 1 buick skylark 320
2 18.0 8 318.0 150.0 3436.0 11.0 70 1 plymouth satellite
3 16.0 8 304.0 150.0 3433.0 12.0 70 1 amc rebel sst
4 17.0 8 302.0 140.0 3449.0 10.5 70 1 ford torino
'''
# fig = plt.figure()
# ax1 = fig.add_subplot(211)
# ax2 = fig.add_subplot(212)
# cars.plot("weight", "mpg", kind="scatter", ax=ax1)
# cars.plot("acceleration", "mpg", kind="scatter", ax=ax2)
# plt.show()
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(cars[['weight']], cars['mpg'])
predictions = lr.predict(cars[['weight']])
plt.scatter(cars['weight'], cars['mpg'], c='B')
plt.scatter(cars['weight'], predictions, c='R')
plt.show()
from sklearn.metrics import mean_squared_error # 均方误差函数
print(mean_squared_error(cars["mpg"], predictions))
import pandas as pd
import matplotlib.pyplot as plt
'''逻辑回归-做分类'''
# 申请学校 数据
admissions = pd.read_csv("admissions.csv")
'''
admit gpa gre
0 0 3.177277 594.102992
1 0 3.412655 631.528607
2 0 2.728097 553.714399
3 0 3.093559 551.089985
4 0 3.141923 537.184894
'''
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
linear_regression = LinearRegression()
linear_regression.fit(admissions[["gpa"]], admissions["admit"])
linear_predict = linear_regression.predict(admissions[["gpa"]])
logistic_regression = LogisticRegression()
logistic_regression.fit(admissions[["gpa"]], admissions["admit"])
logistic_predict = logistic_regression.predict(admissions[["gpa"]]) # predict得到具体的分类
logistic_predict_proba = logistic_regression.predict_proba(admissions[["gpa"]]) # predict_proba得到分类的概率
plt.scatter(admissions["gpa"], linear_predict, c="R")
plt.scatter(admissions["gpa"], logistic_predict, c="B")
plt.scatter(admissions["gpa"], logistic_predict_proba[:, 1], c="G")
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
admissions = pd.read_csv("admissions.csv")
'''
admit gpa gre
0 0 3.177277 594.102992
1 0 3.412655 631.528607
2 0 2.728097 553.714399
3 0 3.093559 551.089985
4 0 3.141923 537.184894
'''
lr = LogisticRegression()
lr.fit(admissions[["gpa"]], admissions["admit"])
admissions["predicted_label"] = lr.predict(admissions[["gpa"]])
# print(admissions["predicted_label"].value_counts())
# 精度预测 预测正确的/总数
matches = admissions["predicted_label"] == admissions["admit"]
admissions_correct = admissions[matches]
# print(len(admissions_correct) / len(admissions))
'''交叉验证'''
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
import pandas as pd
admissions = pd.read_csv("admissions.csv")
'''
admit gpa gre
0 0 3.177277 594.102992
1 0 3.412655 631.528607
2 0 2.728097 553.714399
3 0 3.093559 551.089985
4 0 3.141923 537.184894
'''
lr = LogisticRegression()
kf = KFold(n_splits=5, shuffle=True, random_state=10)
accuracies = cross_val_score(lr, admissions[['gpa']], admissions['admit'], cv=kf)
average_accuracy = sum(accuracies) / len(accuracies)
print(accuracies)
print(average_accuracy)
import pandas as pd
import matplotlib.pyplot as plt
'''多分类问题'''
pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)
columns = ["mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "year", "origin", "car name"]
cars = pd.read_table("auto-mpg.data", delim_whitespace=True, names=columns)
'''
mpg cylinders displacement horsepower weight acceleration year origin car name
0 18.0 8 307.0 130.0 3504.0 12.0 70 1 chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693.0 11.5 70 1 buick skylark 320
2 18.0 8 318.0 150.0 3436.0 11.0 70 1 plymouth satellite
3 16.0 8 304.0 150.0 3433.0 12.0 70 1 amc rebel sst
'''
# get_dummies把气缸数量转换为列属性
dummy_cylinders = pd.get_dummies(cars['cylinders'], prefix='cyl')
dummy_years = pd.get_dummies(cars['year'], prefix='year')
cars = cars.drop("cylinders", axis=1)
cars = cars.drop("year", axis=1)
cars = pd.concat([cars, dummy_cylinders], axis=1)
cars = pd.concat([cars, dummy_years], axis=1)
print(cars.head())