remove_words = set()
fr = open('F:\\PyCharm\\newsProject\\stopword\\stopword.txt', encoding = 'UTF-8')
for word in fr:
remove_words.add(str(word).strip())
fr.close()
for word in seg_list_exact: # 循环读出每个分词
if word not in remove_words: # 如果不在去除词库中
object_list.append(word) # 分词追加到列表
词频统计并导入数据库
word_counts = collections.Counter(object_list) # 对分词做词频统计
word_counts_top10 = word_counts.most_common(100) # 获取前10最高频的词
print(word_counts_top10) # 输出检查
conn,course= get_conn()
for i in word_counts_top10:
sql="insert into result_game (name,values_data) values(%s,%s)"
course.execute(sql,i)
conn.commit()
close_conn(conn,course)