python爬取jsp网页_Python 爬取 热词并进行分类数据分析-[JSP演示+页面跳转]

日期:2020.02.03

博客期:142

星期一

【本博客的代码如若要使用,请在下方评论区留言,之后再用(就是跟我说一声)】

所有相关跳转:

a.【简单准备】

c.【拓扑数据】

d.【数据修复】

f.【JSP演示+页面跳转】(本期博客)

i . 【App制作】

j . 【安全性改造】

今天开始准备把昨天的任务补一下,再补充一下以前的博客。

1、全部热词演示页面改写

我追加了其他页的展示和页面跳转部分js内容

附加新增代码:

com.servlet 包:

1 packagecom.servlet;2

3 importjava.io.IOException;4 importjava.sql.SQLException;5 importjava.util.List;6

7 importjavax.servlet.ServletException;8 importjavax.servlet.ServletOutputStream;9 importjavax.servlet.annotation.WebServlet;10 importjavax.servlet.http.HttpServlet;11 importjavax.servlet.http.HttpServletRequest;12 importjavax.servlet.http.HttpServletResponse;13

14 importorg.json.JSONArray;15 importorg.json.JSONObject;16

17 importcom.dblink.basic.utils.SqlUtils;18 importcom.dblink.basic.utils.sqlKind.MySql_s;19 importcom.dblink.basic.utils.user.UserInfo;20 importcom.dblink.bean.BeanGroup;21 importcom.dblink.sql.DBLink;22

23 @SuppressWarnings("unused")24 public class ServletForLinkData extendsHttpServlet{25 /**

26 *27 */

28 private static final long serialVersionUID = 1L;29 //----------------------------------------------------------------------//30 public void doPost(HttpServletRequest request,HttpServletResponse response) throwsServletException, IOException31 {32 request.setCharacterEncoding("utf-8");33 response.setCharacterEncoding("utf-8");34 response.setContentType("application/json");35 response.setHeader("Cache-Control", "no-cache");36 response.setHeader("Pragma", "no-cache");37

38 String word = request.getParameter("word");39

40 JSONArray jsonArray = newJSONArray();41

42 JSONObject jsonObj = newJSONObject();43

44

45 DBLink dbLink = new DBLink(new SqlUtils(new MySql_s("rc"),new UserInfo("root","123456")));46 BeanGroup bg = null;47 try{48 bg = dbLink.getSelect("Select * From words where word = '"+word+"'").beans;49

50 int leng =bg.size();51

52 jsonObj.put("Length",leng);53

54 jsonArray.put(jsonObj);55

56 for(int i=0;i

67 }68 dbLink.free();69

70 ServletOutputStream os =response.getOutputStream();71 os.write(jsonArray.toString().getBytes());72 os.flush();73 os.close();74 }75 //---------------------------------------------------------------------------------//76 }

ServletForLinkData.java

1 packagecom.servlet;2

3 importjava.io.IOException;4 importjava.sql.SQLException;5 importjava.util.List;6

7 importjavax.servlet.ServletException;8 importjavax.servlet.ServletOutputStream;9 importjavax.servlet.annotation.WebServlet;10 importjavax.servlet.http.HttpServlet;11 importjavax.servlet.http.HttpServletRequest;12 importjavax.servlet.http.HttpServletResponse;13

14 importorg.json.JSONArray;15 importorg.json.JSONObject;16

17 importcom.dblink.basic.utils.SqlUtils;18 importcom.dblink.basic.utils.sqlKind.MySql_s;19 importcom.dblink.basic.utils.user.UserInfo;20 importcom.dblink.bean.BeanGroup;21 importcom.dblink.sql.DBLink;22

23 @SuppressWarnings("unused")24 public class ServletForAllKeyWords extendsHttpServlet{25 /**

26 *27 */

28 private static final long serialVersionUID = 1L;29 //----------------------------------------------------------------------//30 public void doPost(HttpServletRequest request,HttpServletResponse response) throwsServletException, IOException31 {32 request.setCharacterEncoding("utf-8");33 response.setCharacterEncoding("utf-8");34 response.setContentType("application/json");35 response.setHeader("Cache-Control", "no-cache");36 response.setHeader("Pragma", "no-cache");37

38 String sql_rest = request.getParameter("sql");39

40 JSONArray jsonArray = newJSONArray();41

42 JSONObject jsonObj = newJSONObject();43

44

45 DBLink dbLink = new DBLink(new SqlUtils(new MySql_s("rc"),new UserInfo("root","123456")));46 BeanGroup bg = null;47 try{48 bg = dbLink.getSelect("Select * From keywords "+sql_rest).beans;49

50 int leng =bg.size();51

52 int maxSize = dbLink.getSelect("Select * From keywords ").beans.size();53

54 int page = maxSize%leng==0?(maxSize/30):(maxSize/30)+1;55

56 jsonObj.put("Length",leng);57 jsonObj.put("MaxSize",maxSize);58 jsonObj.put("Page",page);59

60 jsonArray.put(jsonObj);61

62 for(int i=0;i

72 }73 dbLink.free();74

75 ServletOutputStream os =response.getOutputStream();76 os.write(jsonArray.toString().getBytes());77 os.flush();78 os.close();79 }80 //---------------------------------------------------------------------------------//81 }

ServletForAllKeyWords.java

web.xml 文件更改:

1 <?xml version="1.0" encoding="UTF-8"?>

2

3 HotWord

4

5 This is the description of my J2EE component

6 This is the display name of my J2EE component

7 ServletForWords

8 com.servlet.ServletForWords

9

10

11 ServletForWords

12 /com/servlet/ServletForWords

13

14

15 This is the description of my J2EE component

16 This is the display name of my J2EE component

17 ServletForAllKeyWords

18 com.servlet.ServletForAllKeyWords

19

20

21 ServletForAllKeyWords

22 /com/servlet/ServletForAllKeyWords

23

24

25 This is the description of my J2EE component

26 This is the display name of my J2EE component

27 ServletForLinkData

28 com.servlet.ServletForLinkData

29

30

31 ServletForLinkData

32 /com/servlet/ServletForLinkData

33

34

35 index.html

36 index.htm

37 index.jsp

38 default.html

39 default.htm

40 default.jsp

41

42

web.xml

jsFiles/word.js 文件

1 var wordPage = 1;2 functionmakePageToWord()3 {4 var Area = '';5 Area += '

';6 Area += '
';7 Area += '

全部热词

';8 Area += '
';9 Area += '
';10 Area += '
';11 Area += '
';12 Area += '
';13 Area += '
';14 Area += ' 热词表';15 Area += '
';16 Area += ' ';17 Area += ' 按照词频顺序';18 Area += ' 按照字母表顺序';19 Area += ' ';20 Area += '        ';21 Area += ' ';22 Area += ' 降序';23 Area += ' 增序';24 Area += ' ';25 Area += '  ';26 Area += '
';27 Area += '
';28 Area += '
';29 Area += '
';30 Area += '
';31 Area += '
';32 Area += '
';33 document.getElementById("page-inner").innerHTML =Area;34 simpleReset();35 }36 functionsimpleReset()37 {38 wordPage = 1;39 resetAndFresh();40 }41 functionXReset(p)42 {43 wordPage =p;44 wordPage = parseInt(""+wordPage);45 resetAndFresh();46 }47 functionresetAndFresh()48 {49 var sty = document.getElementById("sty").value;50 var order = document.getElementById("order").value;51 var xmlHttp = null;52 try{53 xmlHttp = newXMLHttpRequest();54 } catch(e1) {55 try{56 xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");57 } catch(e2) {58 alert("Your browser does not support XMLHTTP!");59 return;60 }61 }62 xmlHttp.onreadystatechange = function() {63 if (xmlHttp.readyState == 4) {64 if (xmlHttp.status == 200)65 {66 var Area = "";67

68 s =xmlHttp.responseText;69 var InformationSet = eval('('+s+')');70 var leng = InformationSet[0].Length;71 var max = InformationSet[0].MaxSize;72 var pageNum = InformationSet[0].Page;73

74 Area += "

热词词频详细信息链接
";86 Area +=InformationSet[i].word;87 Area += " ";89 Area +=InformationSet[i].num;90 Area += " ";92 Area += " 详细信息";93 Area += "

98 {99 for (var i=1;i<=10;++i)100 {101 Area += "

";102 Area += " ";103 Area +=InformationSet[i].word;104 Area += " ";105 Area += " ";106 Area +=InformationSet[i].num;107 Area += " ";108 Area += " ";109 Area += " 详细信息";110 Area += " ";111 Area += "";112 }113 }114 Area += "";115

116

117 if(leng>10)118 {119 Area += "

热词词频详细信息链接
";131 Area +=InformationSet[i].word;132 Area += " ";134 Area +=InformationSet[i].num;135 Area += " ";137 Area += " 详细信息";138 Area += "

143 {144 for (var i=11;i<=20;++i)145 {146 Area += "

";147 Area += " ";148 Area +=InformationSet[i].word;149 Area += " ";150 Area += " ";151 Area +=InformationSet[i].num;152 Area += " ";153 Area += " ";154 Area += " 详细信息";155 Area += " ";156 Area += "";157 }158 }159 Area += "";160 }161

162 if(leng>20)163 {164 Area += "

热词词频详细信息链接
";174 Area +=InformationSet[i].word;175 Area += " ";177 Area +=InformationSet[i].num;178 Area += " ";180 Area += " 详细信息";181 Area += "
";185 }186 Area += "
";188 Area += "
";189 Area += "
";190 Area += "
";191 Area += "

";192 Area += " 起始页 ";193

194 var start = ((wordPage-4)>=1)?wordPage-4:1;195 var end = ((wordPage+4)<=pageNum)?(wordPage+4):pageNum;196

197 //alert(parseInt(wordPage+4+""));

198

199 if(start!=1)200 {201 Area += " ... ";202 }203

204 for(var i=start;i<=end;++i)205 {206 Area += " "+i+" ";207 }208

209 if(end!=pageNum)210 {211 Area += " ... ";212 }213

214 Area += " 结束页 ";215 Area += "  选择页数跳转  ";216 Area += "";217 for(var i=1;i<=pageNum;++i)218 {219 Area += ""+i+"";220 }221 Area += "";222 Area += "

";223 document.getElementById("MessageArea").innerHTML =Area;224 surePage();225 }226 }227 };228 var url ="../com/servlet/ServletForAllKeyWords";229 var server = "sql=";230 //按照词频顺序

231 if(sty==0)232 {233 server += " order by num ";234 }235 //按照字母表顺序

236 else if(sty==1)237 {238 server += " order by word ";239 }240

241 //如果是降序

242 if(order==0)243 {244 server += " DESC ";245 }246

247 server += (" Limit "+((wordPage-1)*30)+",30 ");248

249 xmlHttp.open("POST", url, true);250 xmlHttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");251 xmlHttp.send(server);252 }253 functiontoSomeWhere(word)254 {255 var Area = '';256 Area += '

';257 Area += '
';258 Area += '

'+word+'

';259 Area += '
';260 Area += '
';261 Area += '
';262 Area += '
';263 Area += '
';264 Area += '
';265 document.getElementById("page-inner").innerHTML =Area;266

267 var xmlHttp = null;268 try{269 xmlHttp = newXMLHttpRequest();270 } catch(e1) {271 try{272 xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");273 } catch(e2) {274 alert("Your browser does not support XMLHTTP!");275 return;276 }277 }278 xmlHttp.onreadystatechange = function() {279 if (xmlHttp.readyState == 4) {280 if (xmlHttp.status == 200)281 {282 var Area = "";283

284 s =xmlHttp.responseText;285 var InformationSet = eval('('+s+')');286 var word = InformationSet[1].word;287 var num = InformationSet[1].num;288 var exp = InformationSet[1].exp;289

290 Area += "

"+word+"

";291 Area += "

引用次数:"+num+"

"

292 Area += "

  ";293 if(exp=="")294 {295 Area += "目前百度百科上并没有相关解释信息...";296 }297 else

298 {299 Area +=exp;300 }301 Area += "

";302 Area += "
";303 Area += "

304 document.getElementById("MessageArea").innerHTML =Area;305

306 getLinksForKey(word);307 }308 }309 };310 var url ="../com/servlet/ServletForAllKeyWords";311 var server = "sql= where word='"+word+"'";312

313 xmlHttp.open("POST", url, true);314 xmlHttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");315 xmlHttp.send(server);316 }317 functiongetLinksForKey(word)318 {319 var xmlHttp = null;320 try{321 xmlHttp = newXMLHttpRequest();322 } catch(e1) {323 try{324 xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");325 } catch(e2) {326 alert("Your browser does not support XMLHTTP!");327 return;328 }329 }330 xmlHttp.onreadystatechange = function() {331 if (xmlHttp.readyState == 4) {332 if (xmlHttp.status == 200)333 {334 var Area = "";335 Area += "
";336 Area += "
";337 Area += "引用网页:";338 Area += "
";339 Area += "
";340 s =xmlHttp.responseText;341 var InformationSet = eval('('+s+')');342 var leng = InformationSet[0].Length;343

344 for(var i=1;i<=leng;++i)345 {346 var word =InformationSet[i].word;347 var num =InformationSet[i].num;348 var title =InformationSet[i].title;349 var link =InformationSet[i].link;350 Area += "

";351 Area += ""+title+""

352 Area += "

";353 }354

355 document.getElementById("finalDIV").innerHTML =Area;356 }357 }358 };359 var url ="../com/servlet/ServletForLinkData";360 var server = "word="+word;361

362 xmlHttp.open("POST", url, true);363 xmlHttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");364 xmlHttp.send(server);365 }366 functionsurePage()367 {368 document.getElementById("selPage").selectedIndex = wordPage-1;369 }370 functionmakeSurePage()371 {372 wordPage = document.getElementById("selPage").value;373 wordPage = parseInt(""+wordPage);374 resetAndFresh();375 }

word.js

这个页面跳转是自己写的,自己慢慢调 Bug 调了很久,最后所有页面的跳转都能实现了,我才开始写博客。

2、热词展示页面改写

之后我将热词展示页面稍稍改写了一下,要比昨天的好看,呃~其实就是加了一点CSS修饰,无关紧要的。

对应修改的 word.js 文件里的 function :

function getLinksForKey(word)

{

var xmlHttp = null;

try{

xmlHttp = new XMLHttpRequest();

} catch (e1) {

try {

xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");

} catch (e2) {

alert("Your browser does not support XMLHTTP!");

return;

}

}

xmlHttp.onreadystatechange = function() {

if (xmlHttp.readyState == 4) {

if (xmlHttp.status == 200)

{

var Area = "";

Area += "
";

Area += "
";

Area += "引用网页:";

Area += "
";

Area += "
";

Area += "

  • ";

s = xmlHttp.responseText;

var InformationSet = eval('('+s+')');

var leng = InformationSet[0].Length;

for(var i=1;i<=leng;++i)

{

var word = InformationSet[i].word;

var num = InformationSet[i].num;

var title = InformationSet[i].title;

var link = InformationSet[i].link;

Area += "

";

Area += ""+title+""

Area += "

";

}

Area += "

";

document.getElementById("finalDIV").innerHTML = Area;

}

}

};

var url ="../com/servlet/ServletForLinkData";

var server = "word="+word;

xmlHttp.open("POST", url, true);

xmlHttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");

xmlHttp.send(server);

}

function toSomeWhere(word)

{

var Area = '';

Area += '

';

Area += '

';

Area += '

'+word+'

';

Area += '

';

Area += '

';

Area += '


';

Area += '
';

Area += '

';

Area += '

';

document.getElementById("page-inner").innerHTML = Area;

var xmlHttp = null;

try{

xmlHttp = new XMLHttpRequest();

} catch (e1) {

try {

xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");

} catch (e2) {

alert("Your browser does not support XMLHTTP!");

return;

}

}

xmlHttp.onreadystatechange = function() {

if (xmlHttp.readyState == 4) {

if (xmlHttp.status == 200)

{

var Area = "";

s = xmlHttp.responseText;

var InformationSet = eval('('+s+')');

var word = InformationSet[1].word;

var num = InformationSet[1].num;

var exp = InformationSet[1].exp;

Area += "

"+word+"

";

Area += "

   引用次数:"+num+"

"

Area += "

      ";

if(exp=="")

{

Area += "目前百度百科上并没有相关解释信息...";

}

else

{

Area += exp;

}

Area += "

";

Area += "
";

Area += "

document.getElementById("MessageArea").innerHTML = Area;

getLinksForKey(word);

}

}

};

var url ="../com/servlet/ServletForAllKeyWords";

var server = "sql= where word='"+word+"'";

xmlHttp.open("POST", url, true);

xmlHttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");

xmlHttp.send(server);

}

页面还可以,说的过去...反正最终应该不会提交这个Java Web网页。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值