爬取招聘信息放入文件中
1.JobToFile.java
package com.ld.jsoup.servlet;
import com.ld.jsoup.beans.Job;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class JobToFileServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
request.setCharacterEncoding("UTF-8");
response.setContentType("java/html;charset=UTF-8");
String contextPath = request.getContextPath();
PrintWriter out = response.getWriter();
ExecutorService executorService = Executors.newFixedThreadPool(3);
for (int i=1;i<=2;i++){
final String url="https://search.51job.com/list/010000,000000,0000,00,9,99,java,2,"
+ i
+ ".html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="; //connect用来访问网站 get获取document文件
//引入jsoup,在scoup.xml
try {
final Document document = Jsoup.connect(url).get();
executorService.execute(new Runnable() {
public void run() {
//根据选择器获取对应的元素集合
Elements elements = document.select("p.t1 span a");
for (Element element : elements) {
Connection absUrlConn = Jsoup.connect(element.absUrl("href"));
String absUrl = element.absUrl("href");
Document doc = null;
Job j = new Job();
try {
doc = absUrlConn.get();
//doc = (Document) Jsoup.connect(absUrl).get();
String jobDes = doc.select("div.bmsg.job_msg.inbox").text();
//创建一个File文件对象
File file=new File("E:"+ File.separator+"job.txt");
//创建字符输出流
OutputStreamWriter osw=new OutputStreamWriter(new FileOutputStream(file,true),"UTF-8");
//用输出流的writer方法将工作描述的内容写出
osw.write(jobDes);
osw.write("\r\n");
//刷新并关闭输出流
osw.flush();
osw.close();
} catch (IOException e) {
continue;
}
}
}
});
}
catch (IOException el) {
el.printStackTrace();
}}
out.print("success2");
}
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
super.doPost(req, resp);
}
}
2.indexAjax.js
function ajaxRequest5() {
$.ajax({
"url":"/JobToFile",
"type":"GET",
"datType":"text", //返回数据
"success":ifsuccess, //成功后执行的方法
"error":function () {
alert("请求错误!");
}
});
}
function ifsuccess(data) {
if(data == "success")
alert("爬取成功!")
else
alert("爬取失败!");
}
3.Web.xml
<servlet>
<servlet-name>JobToFileServlet</servlet-name>
<servlet-class>com.ld.jsoup.servlet.JobToFileServlet</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>JobToFileServlet</servlet-name>
<url-pattern>/JobToFile</url-pattern>
</servlet-mapping>