# cnn实现垃圾邮件分类
from keras.models import Sequential
from keras.layers import Dense, Conv1D, GlobalMaxPooling1D, Embedding, Dropout, Activation, MaxPooling1D
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras import metrics
import keras.preprocessing.text
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data_file = 'spam.csv'
# 读取数据
df = pd.read_csv(data_file, encoding='latin-1')
# 标签
labels = df.v1
# 文本
texts = df.v2
# 预处理,将一个句子拆分成单词构成列表
def text_to_word_sequence(text, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True, split=" "):
if lower:
text = text.lower()
if type(text) == unicode:
translate_table = {ord(c): ord(t) for c, t in zip(filters, split*len(filters))}
else:
tra
文本分类(3)-卷积神经网络(CNN)实现文本分类
最新推荐文章于 2022-08-15 20:57:00 发布