python分析热点词汇_CVPR论文生成热点词汇云图

CVPR论文生成热点词汇云图

一、python爬取数据

importrequestsimportpymysql

from bs4importBeautifulSoup

db= pymysql.connect('127.0.0.1',

port=3306,

user='root',

password='123',

db='mytest',

charset='utf8')

cursor=db.cursor()

headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"}

url="http://openaccess.thecvf.com/CVPR2019.py"html=requests.get(url)

soup=BeautifulSoup(html.content,'html.parser')

soup.a.contents=='pdf'pdfs=soup.findAll(name="a",text="pdf")

lis=[]

jianjie=""

fori,pdf in enumerate(pdfs):

pdf_name=pdf["href"].split('/')[-1]

name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")

link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"url1=link

html1=requests.get(url1)

soup1= BeautifulSoup(html1.content, 'html.parser')

weizhi= soup1.find('div', attrs={'id':'abstract'})ifweizhi:

jianjie=weizhi.get_text();

print("这是第"+str(i)+"条数据")

keyword= str(name).split('_')

keywords= ''

fork in range(len(keyword)):if (k == 0):

keywords+=keyword[k]else:

keywords+= ',' +keyword[k]

info={}

info['title'] =name

info['link'] =link

info['abstract']=jianjie

info['keywords']=keywords

lis.append(info)

cursor=db.cursor()fori in range(len(lis)):

cols= ", ".join('`{}`'.format(k) fork in lis[i].keys())

print(cols) #'`name`, `age`'val_cols= ', '.join('%({})s'.format(k) fork in lis[i].keys())

print(val_cols) #'%(name)s, %(age)s'sql= "insert into lunwen(%s) values(%s)"res_sql= sql %(cols, val_cols)

print(res_sql)

cursor.execute(res_sql, lis[i]) # 将字典a传入

db.commit()

num=1print(num)

print("成功")

二、分析、查找关键词

借助Map存储关键词,key为关键词,value为出现的次数。遍历到相同的关键词value值+1,然后根据value值排序。

dao层:

package dao;

import java.sql.Connection;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.LinkedHashMap;

import java.util.Map;

import java.util.stream.Collectors;

import Bean.copy.*;

import jdbc.Util;

import java.sql.Connection;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

import java.util.ArrayList;

import java.util.List;

import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils.Collections;

public class Dao {

public static Map getrc()

{

String sql="select * from lunwen";

Mapmap= new HashMap();

Mapresults= new LinkedHashMap();

Connection con=null;

Statement state=null;

ResultSet rs=null;

con=Util.getConn();

try {

state=con.createStatement();

rs=state.executeQuery(sql);

while(rs.next())

{

String keywords=rs.getString("keywords");

String[] split = keywords.split(",");

for(int i=0;i

{

if(map.get(split[i])==null)

{

map.put(split[i],0);

}

else

{

map.replace(split[i], map.get(split[i])+1);

}

}

}

} catch (SQLException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

Util.close(rs, state, con);

map.entrySet()

.stream()

.sorted((p1, p2) -> p2.getValue().compareTo(p1.getValue()))

.collect(Collectors.toList())

.forEach(ele -> results.put(ele.getKey(), ele.getValue()));

return results;

}

public List list(String keywords) { // 查询所有信息

List list = new ArrayList(); // 创建集合

Connection conn = Util.getConn();

String sql = "select * from lunwen where keywords like "+"'%"+keywords+"%'"; // SQL查询语句

try {

PreparedStatement pst = conn.prepareStatement(sql);

ResultSet rs = pst.executeQuery();

Data data = null;

while (rs.next()) {

String title = rs.getString("title");

String link = rs.getString("link");

String as= rs.getString("abstract");

data = new Data(title,link,as,keywords);

list.add(data);

}

rs.close(); // 关闭

pst.close(); // 关闭

} catch (SQLException e1) {

e1.printStackTrace(); // 抛出异常

}

return list; // 返回一个集合

}

}

servlet层:

package servlet;

import java.io.IOException;

import java.util.Map;

import javax.servlet.ServletException;

import javax.servlet.annotation.WebServlet;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import dao.Dao;

import net.sf.json.JSONArray;

import net.sf.json.JSONObject;

@WebServlet("/RcServlet")

public class RcServlet extends HttpServlet {

private static final long serialVersionUID = 1L;

/**

* @see HttpServlet#HttpServlet()

*/

public RcServlet() {

super();

// TODO Auto-generated constructor stub

}

/**

* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

*/

protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

this.doPost(request, response);

}

/**

* @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

*/

protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

request.setCharacterEncoding("utf-8");

MapsortMap=Dao.getrc();

JSONArray json =new JSONArray();

int k=0;

for (Map.Entry entry : sortMap.entrySet())

{

JSONObject ob=new JSONObject();

ob.put("name", entry.getKey());

ob.put("value", entry.getValue());

if(!(entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of")||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to")||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by")))

{

json.add(ob);

k++;

}

if(k==10)

break;

}

System.out.println(json.toString());

response.getWriter().write(json.toString());

}

}

三、生成热词汇云图

response.setCharacterEncoding("utf-8");%>

热词云

width:30%;

height: 500px;

border:1px solid #ddd;float:right;

}

#table{

overflow-x: auto;

overflow-y: auto;

width:70%;

height: 500px;float:left;

margin-top:100dp;

padding-top:100dp;

}

热词云

论文连接
${item.title}

$.ajax({

url :"RcServlet",

async :true,

type :"POST",

data : {

},

dataType :"json",

success : function(data) {

dt=data;

var mydata= new Array(0);for (var i = 0; i < dt.length; i++) {

var d={};

d["name"] =dt[i].name;

d["value"] =dt[i].value;

mydata.push(d);

}

var myChart= echarts.init(document.getElementById('main'));//设置点击效果

myChart.setOption({

title: {

text:''},

tooltip: {},

series: [{

type :'wordCloud', //类型为字符云

shape:'smooth', //平滑

gridSize :8, //网格尺寸

size : ['50%','50%'],//sizeRange : [ 50, 100 ],

rotationRange : [-45, 0, 45, 90], //旋转范围

textStyle : {

normal : {

fontFamily:'微软雅黑',

color: function() {return 'rgb(' +Math.round(Math.random()* 255) +

', ' + Math.round(Math.random() * 255) +

', ' + Math.round(Math.random() * 255) + ')'}

},

emphasis : {

shadowBlur :5, //阴影距离

shadowColor :'#333' //阴影颜色

}

},

left:'center',

top:'center',

right:null,

bottom:null,

width:'100%',

height:'100%',

data:mydata

}]

});

myChart.on('click', function (params) {

var url= "ClickServlet?keywords=" +params.name;

window.location.href=url;

});

alert("成功!");

},

error : function() {

alert("请求失败");

},

});

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值