bootstrap+jsoup+jsp智联页面抓取系统
1.文件列表
2.项目步骤:
1.准备所需jar包和文件
所需jar包:jsoup-1.8.1.jar
复制bootstrap下的dist文件到webroot下
2.需要编写的文件源码:
Down.java:
package com.tzxy.download;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Down {
/**
* @param args
*/
public static String getHtml(String url,String encoding){
BufferedReader br=null;
StringBuffer sb=new StringBuffer();
try {
//获取Url
URL urlObj=new URL(url);
//打开链接
URLConnection uc=urlObj.openConnection();
//创建写入流
br=new BufferedReader(new InputStreamReader(uc.getInputStream(),encoding));
String temp=null;
while((temp=br.readLine())!=null){
sb.append(temp+"\n");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if(br!=null){
try {
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return sb.toString();
}
public static List<HashMap<String,String>> getJobInfo(String url,String encoding){
//获取页面
String html=getHtml(url,encoding);
//解析html
Document docu=Jsoup.parse(html);
//根据Id获取页面元素,"newlist_list_content_table"为实际页面值
Element element=docu.getElementById("newlist_list_content_table");
//根据Id获取页面元素组
Elements elements=docu.getElementsByClass("newlist");
List<HashMap<String,String>> maps=new ArrayList<HashMap<String,String>>();
//遍历元素组并放入Map集合
for (Element el : elements) {
HashMap<String,String> map=new HashMap<String, String>();
String textTitle=el.getElementsByClass("gsmc").text();
String jobName=el.getElementsByClass("zwmc").text();
String address=el.getElementsByClass("gzdd").text();
String money=el.getElementsByClass("zwyx").text();
String date=el.getElementsByClass("gxsj").text();
map.put("textTitle", textTitle);
map.put("jobName", jobName);
map.put("address", address);
map.put("money", money);
map.put("date", date);
maps.add(map);
}
return maps;
}
public static void main(String[] args) {
// TODO Auto-generated method stub
}
}
index.jsp:
<%@ page language="java" import="java.util.*,com.tzxy.download.*" pageEncoding="UTF-8"%>
<%@taglib prefix="c" uri="http://java.sun.com/jstl/core_rt"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<base href="<%=basePath%>">
<title>My JSP 'index.jsp' starting page</title>
<meta http-equiv="pragma" content="no-cache">
<meta http-equiv="cache-control" content="no-cache">
<meta http-equiv="expires" content="0">
<meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
<meta http-equiv="description" content="This is my page">
<!--
<link rel="stylesheet" type="text/css" href="styles.css">
-->
<link rel="stylesheet" href="dist/css/bootstrap.min.css" type="text/css"></link></head>
<style type="text/css">
.search{
width:100%;
height:200px;
border:1px red solid;
}
.h{
color: red;
text-align: center;
}
.con{
width:700px;
height:50px;
margin: auto;
}
.con .text{
height:35px;
width: 400px;
padding-left: 10px;
}
.con .sub{
width: 50px;
height:35px;
}
.lab{
color:red;
}
.con .text:FOCUS {
box-shadow:1px 1px 2px green;
animation:shadow 3s linear infinite;
}
@keyframes shadow{
from{ box-shadow:1px 1px 2px green,-1px -1px 2px green;}
50%{box-shadow:0px 0px 0px green,-0px -0px 0px green;}
to{box-shadow:1px 1px 2px green,-1px -1px 2px green;}
}
</style>
<body>
<%
String url=request.getParameter("url");
if(url!=""){
List<HashMap<String,String>> list=Down.getJobInfo(url,"utf-8");
session.setAttribute("jobList",list);
}
else{
session.setAttribute("msg","请输入网址");
}
%>
<div class="search">
<form action="index.jsp">
<P align="center" style="font-size: 30px">智联网站抓取系统</P>
<div class="con">请输入网址:<input name="url" type="text" class="text"/>
<input type="submit" class="sub"/>
<label class="lab">${msg}</label>
</div>
</form>
</div>
<h2 class="h">搜索信息如下:</h2>
<table class="table table-striped table-bordered table-hover">
<c:forEach items="${jobList}" var="a">
<tr>
<td>${a.textTitle}</td>
<td>${a.jobName}</td>
<td>${a.money}</td>
<td>${a.address}</td>
<td>${a.date}</td>
</tr>
</c:forEach>
<c:remove var="msg" scope="session"/>
</table>
</body>
</html>