线性回归模型,用来预测
KNN(聚类算法),用来归类(监督学习)
K-Means算法是无监督的聚类算法
ID3(决策树算法),用来决策
朴素贝叶斯,根据关键词对文章内容进行分类
支持向量机(SVM)算法
# encoding=utf8
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits, fetch_20newsgroups, make_blobs
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
#回归模型,用来预测
#w1* 附近商业区 w2*附近学校 w3*附近居民数量 = 8千
def price_predict():
#http://scikit-learn.org/stable/modules/linear_model.html
#数据有三个特征:距离地铁距离、附近小学数量、小区绿化率
# P = W1 * A + W2*B + W3 * C
X = np.array([[500.0, 3.0, 0.3], [1000.0, 1.0, 0.6],
[750.0, 2.0, 0.3], [600.0, 5.0, 0.2], [1200.0, 1.0, 0.6]])
#具有三个特征的房屋对应的房价
Y = np.array([10000., 9000., 8000., 12000., 8500.])
#将数据转化到均值是0,方差是1的标准分布内
std_x = StandardScaler()
std_x.fit(X)
x_train = std_x.transform(X)
std_y = StandardScaler()
y_train = std_y.fit_transform(Y.reshape(-1,1))
#构建线性预测模型
lr = LinearRegression()
#模型在历史数据上进行训练,Y.reshape(-1,1)将Y变为二维数组,fit函数要求二维数组
lr.fit(x_train,y_train)
#使用训练模型预测新房屋[1300,3.0,0.4]的价格
x_predict = std_x.transform(np.array([[1300,3.0,0.4]]))