加州房价预测----无特征工程和数据预处理
import pandas as pd
df_housing = pd.read_csv("https://raw.githubusercontent.com/huangjia2019/house/master/house.csv")
df_housing.to_csv("../house.csv")
x = df_housing.drop("median_house_value", axis= 1)
y = df_housing.median_house_value
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state=0 )
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('房价的真值(测试值):')
print(y_test.values)
print('预测的房价(测试值):')
print(y_pred)
print("给预测评分:", model.score(X_test, y_test))
import matplotlib.pyplot as plt
plt.scatter(X_test.median_income, y_test, color = 'brown')
plt.plot(X_test.median_income, y_pred, color = 'green', linewidth = 1)
plt.xlabel("Median Income")
plt.ylabel("Median House Value")
plt.show()
Keras自带MNIST数据集——手写数字灰度图像分类
import numpy as np
import pandas as pd
from keras.datasets import mnist
(X_train_image, y_train_lable), (X_test_image, y_test_lable) = mnist.load_data()
print("数据集张量形状:", X_train_image.shape)
print("第一个数据样本:\n", X_train_image[0])
print("第一个数据样本的标签:", y_train_lable[0])
from keras.utils import to_categorical
X_train = X_train_image.reshape(60000, 28, 28, 1)
X_test = X_test_image.reshape(10000, 28, 28, 1)
y_train = to_categorical(y_train_lable, 10)
y_test = to_categorical(y_test_lable, 10)
print("训练集张量形状:", X_train.shape)
print("第一个数据标签:", y_train[0])
from keras import models
from keras .layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
model = models.Sequential()
model.add(Conv2D(32, (3, 3), activation= 'relu',
input_shape= (28, 28, 1)))
model.add(MaxPooling2D(pool_size= (2, 2)))
model.add(Conv2D(64, (3, 3), activation= 'relu'))
model.add(MaxPooling2D(pool_size= (2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation= 'relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer= 'rmsprop',
loss= 'categorical_crossentropy',
metrics= ['accuracy'])
model.fit(X_train, y_train,
validation_split= 0.3,
epochs= 5,
batch_size= 128
)
score = model.evaluate(X_test, y_test)
print("测试集预测准确率:", score[1])
'''对于数据集的规模比较小或者模型的性能很不稳定的情况下————K折验证
除去预留独立的测试集,将数据划分为大小相同的K个分区,对于每个分区,都在剩余的K-1个分区上训练模型,然后在留下的分区上评估模型'''
pred = model.predict(X_test[0].reshape(1, 28, 28, 1))
print(pred[0], "转换一下格式得到:", pred.argmax())
import matplotlib.pyplot as plt
import pylab
plt.imshow(X_test[0].reshape(28, 28), cmap='Greys')
pylab.show()