模型训练部分代码
# -*- coding: utf-8 -*- from __future__ import division, print_function, absolute_import import tflearn import os import numpy import jieba import sys import random import re import fire from sys import argv import json from tflearn.data_utils import to_categorical, pad_sequences from tflearn.datasets import imdb def load_data1( keywordPath,stopwords_set,filepath,dictfilepath,n_words, valid_portion=0.1, sort_by_len=True): #keywordPath = sys.argv[1] jieba.load_userdict(keywordPath) pathDir = os.listdir(filepath) data_set = [] train_set_x = [] train_set_y = [] test_set_x = [] test_set_y = [] # 把停用词做成字典 stopwords = {} fstop = open(stopwords_set, 'rb') for eachWord in fstop: stopwords[eachWord.strip().decode('utf-8', 'ignore')] = eachWord.strip().decode('utf-8', 'ignore') fstop.close() #写入词典 f1 = open(dictfilepath, 'w', encoding='UTF-8') dic = dict() i = 0 j = 0