bootstrap+jsoup+jsp智联页面抓取系统

bootstrap+jsoup+jsp智联页面抓取系统

1.文件列表


2.项目步骤:

1.准备所需jar包和文件

所需jar包:jsoup-1.8.1.jar
复制bootstrap下的dist文件到webroot下

2.需要编写的文件源码:
Down.java:
package com.tzxy.download;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Down {

	/**
	 * @param args
	 */
	
    public static String getHtml(String url,String encoding){
    	BufferedReader br=null;
    	StringBuffer sb=new StringBuffer();
    	try {
    		//获取Url
			URL urlObj=new URL(url);
			//打开链接
			URLConnection uc=urlObj.openConnection();
			//创建写入流
			br=new BufferedReader(new InputStreamReader(uc.getInputStream(),encoding));
			
			String temp=null;
			while((temp=br.readLine())!=null){
				sb.append(temp+"\n");
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			if(br!=null){
			try {
				br.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			}
		}
    	return sb.toString();
    }
    public static List<HashMap<String,String>> getJobInfo(String url,String encoding){
    	//获取页面
    	String html=getHtml(url,encoding);
    	//解析html
    	Document docu=Jsoup.parse(html);
    	//根据Id获取页面元素,"newlist_list_content_table"为实际页面值
    	Element element=docu.getElementById("newlist_list_content_table");
    	//根据Id获取页面元素组
    	Elements elements=docu.getElementsByClass("newlist");
    	List<HashMap<String,String>> maps=new ArrayList<HashMap<String,String>>();
    	//遍历元素组并放入Map集合
    	for (Element el : elements) {
			HashMap<String,String> map=new HashMap<String, String>();
			String textTitle=el.getElementsByClass("gsmc").text();
			String jobName=el.getElementsByClass("zwmc").text();
			String address=el.getElementsByClass("gzdd").text();
			String money=el.getElementsByClass("zwyx").text();
			String date=el.getElementsByClass("gxsj").text();
			map.put("textTitle", textTitle);
			map.put("jobName", jobName);
			map.put("address", address);
			map.put("money", money);
			map.put("date", date);
			maps.add(map);
		}
    	return maps;
    }
    public static void main(String[] args) {
		// TODO Auto-generated method stub
    
	}
}

index.jsp:
<%@ page language="java" import="java.util.*,com.tzxy.download.*" pageEncoding="UTF-8"%>
<%@taglib prefix="c"  uri="http://java.sun.com/jstl/core_rt"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <base href="<%=basePath%>">
    
    <title>My JSP 'index.jsp' starting page</title>
	<meta http-equiv="pragma" content="no-cache">
	<meta http-equiv="cache-control" content="no-cache">
	<meta http-equiv="expires" content="0">    
	<meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
	<meta http-equiv="description" content="This is my page">
	<!--
	<link rel="stylesheet" type="text/css" href="styles.css">
	-->
	
  <link rel="stylesheet" href="dist/css/bootstrap.min.css" type="text/css"></link></head>
  <style type="text/css">
  .search{
  width:100%;
  height:200px;
  border:1px red solid;
  }
  .h{
  color: red;
  text-align: center;
  }
  .con{
  width:700px;
  height:50px;
  margin: auto;
  }
  .con .text{
  height:35px;
  width: 400px;
  padding-left: 10px;
  }
  .con .sub{
  width: 50px;
  height:35px;
  }
  .lab{
  color:red;
  }
   .con .text:FOCUS {
   box-shadow:1px 1px 2px green;
   animation:shadow 3s linear infinite;
   }
   @keyframes shadow{
   from{ box-shadow:1px 1px 2px green,-1px -1px 2px green;}
   50%{box-shadow:0px 0px 0px green,-0px -0px 0px green;}
   to{box-shadow:1px 1px 2px green,-1px -1px 2px green;}
   }
  </style>
  <body>
  <%
  String url=request.getParameter("url");
  if(url!=""){
  List<HashMap<String,String>> list=Down.getJobInfo(url,"utf-8");
  session.setAttribute("jobList",list);
  }
  else{
  session.setAttribute("msg","请输入网址");
  }
  %>
  <div class="search">
  <form action="index.jsp">
    <P align="center" style="font-size: 30px">智联网站抓取系统</P>
                
   <div class="con">请输入网址:<input name="url" type="text"  class="text"/>
   <input type="submit" class="sub"/>
   <label class="lab">${msg}</label>
   </div>
 </form>
  </div>
  <h2 class="h">搜索信息如下:</h2>
  <table class="table table-striped table-bordered table-hover">
  <c:forEach items="${jobList}" var="a">
   <tr>
  <td>${a.textTitle}</td>
  <td>${a.jobName}</td>
  <td>${a.money}</td>
  <td>${a.address}</td>
  <td>${a.date}</td>
   </tr>
  </c:forEach>
  <c:remove var="msg" scope="session"/>
  </table>
  </body>
</html>





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值