import pickle
from textrank4zh import TextRank4Keyword, TextRank4Sentence
import os
from snownlp import SnowNLP
import jieba
import jieba.analyse
from bosonnlp import BosonNLP
#http://www.cnblogs.com/hellojesson/p/5961570.htmlhttp://www.cnblogs.com/hellojesson/p/5961570.html
def main():
# textRank2()
textRankTest1()
#
# def textRank2():
# """"加载50万数据"""
#
# path = "d:\\150w"
# file = open(path, 'rb')
# a = pickle.load(file)
# # print(a)
# # tr4s = TextRank4Sentence()
# ll = []
# i = 0
# for item in a:
# print(item[0])
# print("================标题=============")
# print(item[1])
# rowitem = item[2]
# s = SnowNLP(rowitem)
# print("===================原文===================")
# print(rowitem)
# # tr4s.analyze(text=rowitem, lower=True, source='all_filters')
# # print(rowitem)
# # print("\033[1;31m%s\033[43m" %rowitem)
# ceshi = []
# ceshi.append(rowitem)
# nlp = BosonNLP('lSfW0ZxS.17321.5fBmJSZHbWEv')
# print("==================情感分析====================")
# print(nlp.sentiment(rowitem)) # 情感分析结果分别为 “非负面” 和 “负面” 概率组成的列表。
# print('==================摘要=====================')
# tags_output = jieba.analyse.extract_tags(rowitem, topK=20, withWeight=True)
# print(tags_output)
# # ceshi.append(tags_output)
# ceshi.append(s.summary(3))
# print(s.summary(5))
# ll.append(ceshi)
# i = i + 1
# if i > 100:
# break
# # print(i)
#
# current_dir = os.path.abspath('.')
# file_name2 = os.path.join(current_dir, 'abstract.csv')
# f2 = open(file_name2, 'w+', encoding='utf8')
#
# for item in ll:
# f2.write("\n")
# f2.write("======测试(原文)====")
# f2.write("\n")
# f2.write(str(item[0]))
# f2.write("\n")
# f2.write("======摘要====")
# f2.write("\n")
# f2.write(str(item[1]))
# f2.write("\n")
#
# f2.close()
def textRankTest1():
""""加载50万数据"""
path = "d:\\100w"
file = open(path, 'rb')
a = pickle.load(file)
# print(a)
tr4s = TextRank4Sentence()
ll = []
i = 0
for item in a:
print(item[0])
rowitem = item[2]
tr4s.analyze(text=rowitem, lower=True, source='all_filters')
("===================原文===================")
# print(rowitem)
# print("\033[1;31m%s\033[43m" %rowitem)
ceshi = []
ceshi.append(rowitem)
print()
print('==================摘要=====================')
for item in tr4s.get_key_sentences(num=3):
print(item.index, item.weight, item.sentence) # index是语句在文本中位置,weight是权重
ceshi.append(item.sentence)
ll.append(ceshi)
i = i + 1
if i > 100:
break
print(i)
current_dir = os.path.abspath('.')
file_name2 = os.path.join(current_dir, 'abstract.csv')
f2 = open(file_name2, 'w+', encoding='utf8')
for item in ll:
f2.write("\033[0;31m%s\033[0m" % "======测试(原文)====")
f2.write("\n")
f2.write(str(item[0]))
f2.write("\n")
f2.write("\033[0;31m%s\033[0m" % "======摘要====")
f2.write("\n")
f2.write(str(item[1]))
f2.write("\n")
f2.close()
# print("\033[0;31m%s\033[0m" % "======测试====")
if __name__ == '__main__':
main()
from textrank4zh import TextRank4Keyword, TextRank4Sentence
import os
from snownlp import SnowNLP
import jieba
import jieba.analyse
from bosonnlp import BosonNLP
#http://www.cnblogs.com/hellojesson/p/5961570.htmlhttp://www.cnblogs.com/hellojesson/p/5961570.html
def main():
# textRank2()
textRankTest1()
#
# def textRank2():
# """"加载50万数据"""
#
# path = "d:\\150w"
# file = open(path, 'rb')
# a = pickle.load(file)
# # print(a)
# # tr4s = TextRank4Sentence()
# ll = []
# i = 0
# for item in a:
# print(item[0])
# print("================标题=============")
# print(item[1])
# rowitem = item[2]
# s = SnowNLP(rowitem)
# print("===================原文===================")
# print(rowitem)
# # tr4s.analyze(text=rowitem, lower=True, source='all_filters')
# # print(rowitem)
# # print("\033[1;31m%s\033[43m" %rowitem)
# ceshi = []
# ceshi.append(rowitem)
# nlp = BosonNLP('lSfW0ZxS.17321.5fBmJSZHbWEv')
# print("==================情感分析====================")
# print(nlp.sentiment(rowitem)) # 情感分析结果分别为 “非负面” 和 “负面” 概率组成的列表。
# print('==================摘要=====================')
# tags_output = jieba.analyse.extract_tags(rowitem, topK=20, withWeight=True)
# print(tags_output)
# # ceshi.append(tags_output)
# ceshi.append(s.summary(3))
# print(s.summary(5))
# ll.append(ceshi)
# i = i + 1
# if i > 100:
# break
# # print(i)
#
# current_dir = os.path.abspath('.')
# file_name2 = os.path.join(current_dir, 'abstract.csv')
# f2 = open(file_name2, 'w+', encoding='utf8')
#
# for item in ll:
# f2.write("\n")
# f2.write("======测试(原文)====")
# f2.write("\n")
# f2.write(str(item[0]))
# f2.write("\n")
# f2.write("======摘要====")
# f2.write("\n")
# f2.write(str(item[1]))
# f2.write("\n")
#
# f2.close()
def textRankTest1():
""""加载50万数据"""
path = "d:\\100w"
file = open(path, 'rb')
a = pickle.load(file)
# print(a)
tr4s = TextRank4Sentence()
ll = []
i = 0
for item in a:
print(item[0])
rowitem = item[2]
tr4s.analyze(text=rowitem, lower=True, source='all_filters')
("===================原文===================")
# print(rowitem)
# print("\033[1;31m%s\033[43m" %rowitem)
ceshi = []
ceshi.append(rowitem)
print()
print('==================摘要=====================')
for item in tr4s.get_key_sentences(num=3):
print(item.index, item.weight, item.sentence) # index是语句在文本中位置,weight是权重
ceshi.append(item.sentence)
ll.append(ceshi)
i = i + 1
if i > 100:
break
print(i)
current_dir = os.path.abspath('.')
file_name2 = os.path.join(current_dir, 'abstract.csv')
f2 = open(file_name2, 'w+', encoding='utf8')
for item in ll:
f2.write("\033[0;31m%s\033[0m" % "======测试(原文)====")
f2.write("\n")
f2.write(str(item[0]))
f2.write("\n")
f2.write("\033[0;31m%s\033[0m" % "======摘要====")
f2.write("\n")
f2.write(str(item[1]))
f2.write("\n")
f2.close()
# print("\033[0;31m%s\033[0m" % "======测试====")
if __name__ == '__main__':
main()