Python 对文本汇总产生的文件内容进行可视化 (加粗汇总内容)

#!/usr/bin/python 
# -*- coding: utf-8 -*-

'''
Created on 2015-1-25
@author: beyondzhou
@name: visualize_document_summary.py
'''

import os
import json
from IPython.display import IFrame
from IPython.core.display import display
from summary import summarize

# Download nltk packages used in this example
#nltk.download('stopwords')

# Read data 
BLOG_DATA = r"E:\eclipse\Web\dFile\feed.json"
blog_data = json.loads(open(BLOG_DATA).read())

HTML_TEMPLATE = """<html>
    <head>
        <title>%s</title>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
    </head>
    <body>%s</body>
</html>"""


for post in blog_data:

    # Uses previously defined summarize function.
    post.update(summarize(post['content']))

    # You could also store a version of the full post with key sentences
    # marked up for analysis with simple string replacement
    for summary_type in ['top_n_summary', 'mean_scored_summary']:
        post[summary_type + '_marked_up'] = '<p>%s</p>' % (post['content'], )
        for s in post[summary_type]:
            post[summary_type + '_marked_up'] = \
            post[summary_type + '_marked_up'].replace(s, '<strong>%s</strong>' % (s, ))

        filename = post['title'].replace("?", "") + '.summary.' + summary_type + '.html'
        f = open(os.path.join(r"E:", "\\", "eclipse", "Web", "dfile", filename), 'w')
        html = HTML_TEMPLATE % (post['title'] + \
            ' Summary', post[summary_type + '_marked_up'],)
        f.write(html.encode('utf-8'))
        f.close()

        print "Data written to", f.name

# Display any of these files with an inline frame. This displays the
# last file processed by using the last value of f.name...
print "Displaying %s:" % f.name
display(IFrame('files/%s' % f.name, '100%', '600px'))

Data written to E:\eclipse\Web\dfile\Four short links: 23 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 23 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Designing on a system level.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Designing on a system level.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Bitcoin is just the first app to use blockchain technology.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Bitcoin is just the first app to use blockchain technology.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Blockchain scalability.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Blockchain scalability.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Bringing an end to synthetic biology’s semantic debate.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Bringing an end to synthetic biology’s semantic debate.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Building and deploying large-scale machine learning pipelines.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Building and deploying large-scale machine learning pipelines.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 22 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 22 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\How to make a UX designer.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\How to make a UX designer.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\The 3Ps of the blockchain: platforms, programs and protocols.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\The 3Ps of the blockchain: platforms, programs and protocols.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 21 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 21 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\The Internet of Things is really about software.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\The Internet of Things is really about software.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\What containers can do for you.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\What containers can do for you.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 20 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 20 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Striking parallels between mathematics and software engineering.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Striking parallels between mathematics and software engineering.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 19 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 19 January 2015.summary.mean_scored_summary.html
Displaying E:\eclipse\Web\dfile\Four short links: 19 January 2015.summary.mean_scored_summary.html
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值