CVPR论文生成热点词汇云图
一、python爬取数据
importrequestsimportpymysql
from bs4importBeautifulSoup
db= pymysql.connect('127.0.0.1',
port=3306,
user='root',
password='123',
db='mytest',
charset='utf8')
cursor=db.cursor()
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"}
url="http://openaccess.thecvf.com/CVPR2019.py"html=requests.get(url)
soup=BeautifulSoup(html.content,'html.parser')
soup.a.contents=='pdf'pdfs=soup.findAll(name="a",text="pdf")
lis=[]
jianjie=""
fori,pdf in enumerate(pdfs):
pdf_name=pdf["href"].split('/')[-1]
name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")
link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"url1=link
html1=requests.get(url1)
soup1= BeautifulSoup(html1.content, 'html.parser')
weizhi= soup1.find('div', attrs={'id':'abstract'})ifweizhi:
jianjie=weizhi.get_text();
print("这是第"+str(i)+"条数据")
keyword= str(name).split('_')
keywords= ''
fork in range(len(keyword)):if (k == 0):
keywords+=keyword[k]else:
keywords+= ',' +keyword[k]
info={}
info['title'] =name
info['link'] =link
info['abstract']=jianjie
info['keywords']=keywords
lis.append(info)
cursor=db.cursor()fori in range(len(lis)):
cols= ", ".join('`{}`'.format(k) fork in lis[i].keys())
print(cols) #'`name`, `age`'val_cols= ', '.join('%({})s'.format(k) fork in lis[i].keys())
print(val_cols) #'%(name)s, %(age)s'sql= "insert into lunwen(%s) values(%s)"res_sql= sql %(cols, val_cols)
print(res_sql)
cursor.execute(res_sql, lis[i]) # 将字典a传入
db.commit()
num=1print(num)
print("成功")
二、分析、查找关键词
借助Map存储关键词,key为关键词,value为出现的次数。遍历到相同的关键词value值+1,然后根据value值排序。
dao层:
package dao;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.Collectors;
import Bean.copy.*;
import jdbc.Util;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils.Collections;
public class Dao {
public static Map getrc()
{
String sql="select * from lunwen";
Mapmap= new HashMap();
Mapresults= new LinkedHashMap();
Connection con=null;
Statement state=null;
ResultSet rs=null;
con=Util.getConn();
try {
state=con.createStatement();
rs=state.executeQuery(sql);
while(rs.next())
{
String keywords=rs.getString("keywords");
String[] split = keywords.split(",");
for(int i=0;i
{
if(map.get(split[i])==null)
{
map.put(split[i],0);
}
else
{
map.replace(split[i], map.get(split[i])+1);
}
}
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Util.close(rs, state, con);
map.entrySet()
.stream()
.sorted((p1, p2) -> p2.getValue().compareTo(p1.getValue()))
.collect(Collectors.toList())
.forEach(ele -> results.put(ele.getKey(), ele.getValue()));
return results;
}
public List list(String keywords) { // 查询所有信息
List list = new ArrayList(); // 创建集合
Connection conn = Util.getConn();
String sql = "select * from lunwen where keywords like "+"'%"+keywords+"%'"; // SQL查询语句
try {
PreparedStatement pst = conn.prepareStatement(sql);
ResultSet rs = pst.executeQuery();
Data data = null;
while (rs.next()) {
String title = rs.getString("title");
String link = rs.getString("link");
String as= rs.getString("abstract");
data = new Data(title,link,as,keywords);
list.add(data);
}
rs.close(); // 关闭
pst.close(); // 关闭
} catch (SQLException e1) {
e1.printStackTrace(); // 抛出异常
}
return list; // 返回一个集合
}
}
servlet层:
package servlet;
import java.io.IOException;
import java.util.Map;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import dao.Dao;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
@WebServlet("/RcServlet")
public class RcServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
/**
* @see HttpServlet#HttpServlet()
*/
public RcServlet() {
super();
// TODO Auto-generated constructor stub
}
/**
* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
*/
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
this.doPost(request, response);
}
/**
* @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
*/
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
request.setCharacterEncoding("utf-8");
MapsortMap=Dao.getrc();
JSONArray json =new JSONArray();
int k=0;
for (Map.Entry entry : sortMap.entrySet())
{
JSONObject ob=new JSONObject();
ob.put("name", entry.getKey());
ob.put("value", entry.getValue());
if(!(entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of")||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to")||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by")))
{
json.add(ob);
k++;
}
if(k==10)
break;
}
System.out.println(json.toString());
response.getWriter().write(json.toString());
}
}
三、生成热词汇云图
response.setCharacterEncoding("utf-8");%>
热词云width:30%;
height: 500px;
border:1px solid #ddd;float:right;
}
#table{
overflow-x: auto;
overflow-y: auto;
width:70%;
height: 500px;float:left;
margin-top:100dp;
padding-top:100dp;
}
热词云
论文连接 |
---|
${item.title} |
$.ajax({
url :"RcServlet",
async :true,
type :"POST",
data : {
},
dataType :"json",
success : function(data) {
dt=data;
var mydata= new Array(0);for (var i = 0; i < dt.length; i++) {
var d={};
d["name"] =dt[i].name;
d["value"] =dt[i].value;
mydata.push(d);
}
var myChart= echarts.init(document.getElementById('main'));//设置点击效果
myChart.setOption({
title: {
text:''},
tooltip: {},
series: [{
type :'wordCloud', //类型为字符云
shape:'smooth', //平滑
gridSize :8, //网格尺寸
size : ['50%','50%'],//sizeRange : [ 50, 100 ],
rotationRange : [-45, 0, 45, 90], //旋转范围
textStyle : {
normal : {
fontFamily:'微软雅黑',
color: function() {return 'rgb(' +Math.round(Math.random()* 255) +
', ' + Math.round(Math.random() * 255) +
', ' + Math.round(Math.random() * 255) + ')'}
},
emphasis : {
shadowBlur :5, //阴影距离
shadowColor :'#333' //阴影颜色
}
},
left:'center',
top:'center',
right:null,
bottom:null,
width:'100%',
height:'100%',
data:mydata
}]
});
myChart.on('click', function (params) {
var url= "ClickServlet?keywords=" +params.name;
window.location.href=url;
});
alert("成功!");
},
error : function() {
alert("请求失败");
},
});