#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Created on 2015-1-25
@author: beyondzhou
@name: visualize_document_summary.py
'''
import os
import json
from IPython.display import IFrame
from IPython.core.display import display
from summary import summarize
# Download nltk packages used in this example
#nltk.download('stopwords')
# Read data
BLOG_DATA = r"E:\eclipse\Web\dFile\feed.json"
blog_data = json.loads(open(BLOG_DATA).read())
HTML_TEMPLATE = """<html>
<head>
<title>%s</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>%s</body>
</html>"""
for post in blog_data:
# Uses previously defined summarize function.
post.update(summarize(post['content']))
# You could also store a version of the full post with key sentences
# marked up for analysis with simple string replacement
for summary_type in ['top_n_summary', 'mean_scored_summary']:
post[summary_type + '_marked_up'] = '<p>%s</p>' % (post['content'], )
for s in post[summary_type]:
post[summary_type + '_marked_up'] = \
post[summary_type + '_marked_up'].replace(s, '<strong>%s</strong>' % (s, ))
filename = post['title'].replace("?", "") + '.summary.' + summary_type + '.html'
f = open(os.path.join(r"E:", "\\", "eclipse", "Web", "dfile", filename), 'w')
html = HTML_TEMPLATE % (post['title'] + \
' Summary', post[summary_type + '_marked_up'],)
f.write(html.encode('utf-8'))
f.close()
print "Data written to", f.name
# Display any of these files with an inline frame. This displays the
# last file processed by using the last value of f.name...
print "Displaying %s:" % f.name
display(IFrame('files/%s' % f.name, '100%', '600px'))
Data written to E:\eclipse\Web\dfile\Four short links: 23 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 23 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Designing on a system level.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Designing on a system level.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Bitcoin is just the first app to use blockchain technology.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Bitcoin is just the first app to use blockchain technology.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Blockchain scalability.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Blockchain scalability.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Bringing an end to synthetic biology’s semantic debate.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Bringing an end to synthetic biology’s semantic debate.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Building and deploying large-scale machine learning pipelines.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Building and deploying large-scale machine learning pipelines.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 22 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 22 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\How to make a UX designer.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\How to make a UX designer.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\The 3Ps of the blockchain: platforms, programs and protocols.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\The 3Ps of the blockchain: platforms, programs and protocols.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 21 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 21 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\The Internet of Things is really about software.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\The Internet of Things is really about software.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\What containers can do for you.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\What containers can do for you.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 20 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 20 January 2015.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Striking parallels between mathematics and software engineering.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Striking parallels between mathematics and software engineering.summary.mean_scored_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 19 January 2015.summary.top_n_summary.html
Data written to E:\eclipse\Web\dfile\Four short links: 19 January 2015.summary.mean_scored_summary.html
Displaying E:\eclipse\Web\dfile\Four short links: 19 January 2015.summary.mean_scored_summary.html