训练
import jieba
import numpy as np
import pandas as pd
from gensim.models.word2vec import Word2Vec
import joblib
from sklearn.svm import SVC
pos = pd.read_csv('weather_pos.txt', encoding='UTF-8', header=None)
neg = pd.read_csv('weather_neg.txt', encoding='UTF-8', header=None)
pos['words'] = pos[0].apply(lambda x: jieba.lcut(x))
neg['words'] = neg[0].apply(lambda x: jieba.lcut(x))
x = np.concatenate((pos['words'], neg['words']))
y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))
word2vec = Word2Vec(x, size=128, window=3, min_count=5, sg=1, hs=1, iter=10, workers=25)
word2vec.save('word2vec.model')
def total_vector(words):
vec = np.zeros(128).reshape((1, 128))
for word in words:
try:
vec += word2vec.wv[word].reshape((1, 128))
except KeyError:
continue
return vec
train_vec = np.concatenate([total_vector(words) for words in x])
model = SVC(kernel='rbf', verbose=True)
model.fit(train_vec, y)
joblib.dump(model, 'weather_svm.pkl')
测试
import jieba
import numpy as np
from gensim.models.word2vec import Word2Vec
import joblib
word2vec = Word2Vec.load('word2vec.model')
model = joblib.load('weather_svm.pkl')
def total_vector(words):
vec = np.zeros(128).reshape((1, 128))
for word in words:
try:
vec += word2vec.wv[word].reshape((1, 128))
except KeyError:
continue
return vec
def svm_predict(query):
words = jieba.lcut(str(query))
words_vec = total_vector(words)
result = model.predict(words_vec)
if int(result) == 1:
print('类别:天气')
elif int(result) == 0:
print('类别:其他')
while 1:
str_ = input('请输入:')
svm_predict(str_)
结果