import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CreationHelper; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.junit.Test; public class Jll2 { /** * *<p>Title:智联招聘网站爬虫小案例</p> *<p>Description:</p> *<p>Company:</p> *@author wty * @throws IOException *@date 2017年7月8日上午11:28:01 * */ public static String getHtmlCode(String url, String encoding) throws IOException { URL uri =null; URLConnection urlConnection =null; InputStream inputStream =null; InputStreamReader inputStreamReader = null; BufferedReader bReader =null; StringBuffer sBuffer= new StringBuffer(); try { // 建立网络连接 uri = new URL(url); // 打开连接 urlConnection = uri.openConnection(); //输入流 inputStream = urlConnection.getInputStream(); inputStreamReader = new InputStreamReader(inputStream, encoding); bReader = new BufferedReader(inputStreamReader); String temp; while ((temp = bReader.readLine()) != null) { sBuffer.append(temp + "\n"); } } catch (Exception e) { e.printStackTrace(); }finally { //关闭资源 if(bReader!=null){ try { bReader.close(); } catch (IOException e) { e.printStackTrace(); } } } return sBuffer.toString(); } public static List<HashMap<String, String>> analyzeHtml(String url, String encoding) throws IOException{ String htmlCode = getHtmlCode(url, encoding); Document document = Jsoup.parse(htmlCode); Elements elements = document.getElementsByClass("newlist"); List<HashMap<String, String>> list=new ArrayList<>(); for (Element e : elements) { HashMap<String, String> map = new HashMap<>(); String zwmc = e.getElementsByClass("zwmc").text(); String gsmc = e.getElementsByClass("gsmc").text(); String zwyx = e.getElementsByClass("zwyx").text(); String gzdd = e.getElementsByClass("gzdd").text(); String gxsj = e.getElementsByClass("gxsj").text(); map.put("职位名称", zwmc); map.put("公司名称", gsmc); map.put("职位月薪", zwyx); map.put("工作地点", gzdd); map.put("发布日期", gxsj); list.add(map); } return list; } public static void main(String[] args) throws IOException { Workbook wb = new HSSFWorkbook(); FileOutputStream fileOut = new FileOutputStream("E:\\fiveMonth\\poi\\txts2.xls"); Sheet sheet = wb.createSheet("java"); List<HashMap<String, String>> resultList = analyzeHtml("http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E5%8C%97%E4%BA%AC&kw=java&p=1&isadv=0", "UTF-8"); for (int i = 0; i < resultList.size(); i++) { Row row = sheet.createRow(i); row.createCell(0).setCellValue(resultList.get(i).get("职位名称")); row.createCell(1).setCellValue(resultList.get(i).get("公司名称")); row.createCell(2).setCellValue(resultList.get(i).get("职位月薪")); row.createCell(3).setCellValue(resultList.get(i).get("工作地点")); row.createCell(4).setCellValue(resultList.get(i).get("发布日期")); } wb.write(fileOut); fileOut.close(); } }