机器学习案例.(数据、文字、图像)

案例一:数据预测. 

数据:

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, AdaBoostRegressor, VotingClassifier, BaggingRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import r2_score

from sklearn.neighbors import KNeighborsRegressor


import pandas as pd
import numpy as np

train=pd.read_excel("DATABASE.xlsx")
test=pd.read_excel("testdatabase.xlsx")

x_train=train.iloc[:,:5]
y_train=train.iloc[:,5]     #训练集

x_test=test.iloc[:,:5]
y_test=test.iloc[:,5]     #测试集

#特征工程 归一化

train_min=x_train.min()
train_max=x_train.max()

test_min=x_test.min()
test_max=x_test.max()

new_x_train=(x_train-train_min)/(train_max-train_min)
new_x_test=(x_test-test_min)/(test_max-test_min)

new_x_train.info()
print(new_x_test.describe())

knn=KNeighborsRegressor(8)
knn.fit(new_x_train,y_train)
y_pred=knn.predict(new_x_test)
score=knn.score(new_x_test,y_test)
print(score)


#
# a=pd.DataFrame([[0.0010256,475,200,1.5,0]],columns=["ua_values","ta_values","tb_values","n1_values","n2_values"])
# a.columns=a.columns.astype(str)
# new=(a-train_min)/(train_max-test_min)
# print(a)
#
# 总结=knn.predict(new)
# print(总结)
model = LinearRegression()
# 在训练集上训练模型
model.fit(new_x_train, y_train)

# 在测试集上进行预测
y_preds = model.predict(new_x_test)
scoreb=model.score(new_x_test,y_test)
print(scoreb)


random_forest=RandomForestRegressor(n_estimators=200)
random_forest.fit(new_x_train,y_train)
scorec=random_forest.score(new_x_test,y_test)
print(scorec)


rige=Ridge()
rige.fit(new_x_train,y_train)
rige_y_pred=rige.predict(new_x_test)
rige_score=r2_score(y_test,rige_y_pred)
print(rige_score)

vote=VotingRegressor(estimators=[("knn",knn),("lr",model),("rd",random_forest)])
vote.fit(new_x_train,y_train)
y_preds=vote.predict(new_x_test)
vscore=r2_score(y_test,y_preds)
print(vscore)


bagging=BaggingRegressor(random_forest,n_estimators=10,random_state=42)
bagging.fit(new_x_train,y_train)
y_pred=bagging.predict(new_x_test)
bagscore=r2_score(y_test,y_pred)
print(bagscore)

adboost=AdaBoostRegressor(estimator=random_forest,n_estimators=10)
adboost.fit(new_x_train,y_train)
adboost_y_pred=adboost.predict(new_x_test)
adsocre=r2_score(y_test,adboost_y_pred)
print(adsocre)

结果:

案例二:文本分类

数据:

# coding=utf-8
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier, GradientBoostingClassifier, \
    StackingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, accuracy_score
from sklearn.naive_bayes import GaussianNB, MultinomialNB
import pandas as pd


from sklearn.tree import DecisionTreeClassifier


train=pd.read_csv("train.csv",encoding="utf-8")

train=train.dropna(subset=("Label"))
test=pd.read_csv("test.csv")

x_train=train["Message_body"]
y_train=train["Label"]

x_test=test["Message_body"]
y_test=test["Label"]

x_train=x_train.str.lower()#转换为小写
x_test=x_test.str.lower()




vector=TfidfVectorizer()
new_x_train=vector.fit_transform(x_train)
new_x_test=vector.transform(x_test)
print(new_x_train)


nb=MultinomialNB()
nb.fit(new_x_train,y_train)
y_pred=nb.predict(new_x_test)
accuract=accuracy_score(y_test,y_pred)
print("NB",accuract)

logist=LogisticRegression()
logist.fit(new_x_train,y_train)
y_pred=logist.predict(new_x_test)
logist_score=accuracy_score(y_test,y_pred)
print("logist",logist_score)



decision=DecisionTreeClassifier()
decision.fit(new_x_train,y_train)
y_pred=decision.predict(new_x_test)
decision_score=accuracy_score(y_test,y_pred)
print("deciosin",decision_score)


random=RandomForestClassifier()
random.fit(new_x_train,y_train)
y_pred=random.predict(new_x_test)
random_score=accuracy_score(y_test,y_pred)
print("random",random_score)



Gradient=GradientBoostingClassifier()
Gradient.fit(new_x_train,y_train)
y_pred=Gradient.predict(new_x_test)
gradient_score=accuracy_score(y_test,y_pred)
print("grandient",gradient_score)

bagging=BaggingClassifier(Gradient,n_estimators=10,random_state=42)
bagging.fit(new_x_train,y_train)
y_pred=bagging.predict(new_x_test)
baggin_score=accuracy_score(y_test,y_pred)
print("bagging",baggin_score)

vote=VotingClassifier(estimators=[("decision",decision),("random",bagging),("b",Gradient)],voting="hard")
vote.fit(new_x_train,y_train)
y_pred=vote.predict(new_x_test)
vote_score=accuracy_score(y_test,y_pred)
print("vote",vote_score)



stacking=StackingClassifier([("decision",decision),("random",bagging),("b",Gradient)],final_estimator=vote)
stacking.fit(new_x_train,y_train)
y_pred=stacking.predict(new_x_test)
stacking_score=accuracy_score(y_test,y_pred)
print("stack",stacking_score)


sentence = "what are you doing man?"
arr =  pd.Series([sentence], dtype="object")
print(arr)
new_test=vector.transform(test)
y_pred=stacking.predict(new_test)
print(y_pred)



结果:

案例三:图像(手写数字识别)

数据:

# coding=utf-8
import numpy as np

from matplotlib import pyplot as plt



img = plt.imread('./手写数字识别/0/0_1.bmp')

plt.imshow(img,cmap='gray')
plt.show()


# 批量导入5000个图片数据
data = [] # 分类模型输入数据
target = [] # 分类模型输出数据

for i in range(10):
    for j in range(1,501):
        img = plt.imread(f'./手写数字识别/{i}/{i}_{j}.bmp')
        data.append(img)
        target.append(i)
# 此时data和target作为列表数据运算起来非常耗内存,所以先转为数组形式的数据方便处理,然后再改变维度
data = np.array(data).reshape(5000, -1)
target = np.array(target).reshape(5000, -1)
print('data的形状:',data.shape,'target的形状:',target.shape)

# 数据划分为训练集和测试集
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data,target,test_size=0.2) # 20%的测试集

# 导入模型
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()

# 训练数据
knn.fit(x_train,y_train)

# 查看模型得分,如果是pycharm就把下面代码放到print中
print(knn.score(x_test,y_test))
y_pred=knn.predict(x_test)
# 随机挑选10个测试值画图查看预测结果
choice = np.random.randint(1,1000,15).tolist()
# 设置画布大小
plt.figure(figsize=(5*10,3*10))

for i in range(15):
    # 画子图
    re = plt.subplot(3,5,i+1)
    re.imshow(x_test[choice[i]].reshape(28,-1),cmap='gray')
    re.set_title(f'real:{y_test[choice[i]]},\npredict:{y_pred[choice[i]]}',fontsize=40,color = 'k' if y_test[choice[i]] == y_pred[choice[i]] else 'r')
plt.show()

结果: 

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值