第一步 创建maven项目
第二步 导入excel和springboot的依赖
<dependencies> <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.16.10</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> </dependency> <!--excel的poi的依赖--> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.10-FINAL</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.10-FINAL</version> </dependency> </dependencies> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.0.3.RELEASE</version> </parent>
第三步 在单元测试类 写两个方法代码如下:
public class TestReptilian { //#root > div > main > div > div > div > div.SignFlowHeader @Test public void test() throws IOException { // Document document = Jsoup.connect("https://www.zhihu.com/explore/recommendations").userAgent("Mozilla").get(); Element main = document.getElementById("zh-recommend-list-full"); Elements url = main.select("div").select("div:nth-child(2)") .select("h2").select("a[class=question_link]"); for (Element question : url) { //输出href后的值,即主页上每个关注问题的链接 String URL = question.attr("abs:href"); //下载问题链接指向的页面 Document document2 = Jsoup.connect(URL) .userAgent("Mozilla") .get(); //问题 #root > div > main > div > div:nth-child(11) > div.QuestionHeader > div.QuestionHeader-content > div.QuestionHeader-main > h1 Elements title = document2.select("#root") .select("div") .select("main") .select("div") .select("div:nth-child(11)") .select("div.QuestionHeader") .select("div.QuestionHeader-content") .select("div.QuestionHeader-main") .select("h1"); //问题描述 #root > div > main > div > div:nth-child(11) > div.QuestionHeader > div.QuestionHeader-content > div.QuestionHeader-main > div:nth-child(3) > div > div > div > span Elements detail = document2.select("#root") .select("div") .select("main") .select("div") .select("div:nth-child(11)") .select("div.QuestionHeader") .select("div.QuestionHeader-content") .select("div.QuestionHeader-main") .select("div:nth-child(3)") .select("div") .select("div") .select("div") .select("span"); //回答 Elements answer = document2.select("#root") .select("div") .select("main") .select("div") .select("div.Question-main") .select("div.Question-mainColumn") .select("div.Card.AnswerCard") .select("div") .select("div") .select("div.RichContent.RichContent--unescapable") .select("div.RichContent-inner") .select("span"); System.out.println("\n" + "链接:" + URL + "\n" + "标题:" + title.text() + "\n" + "问题描述:" + detail.text() + "\n" + "回答:" + answer.text()); writeExcel(URL,title.text(),detail.text(),answer.text()); } } public void writeExcel(String url, String title, String detail, String answer) { Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("0"); Row row = sheet.createRow(0); CellStyle cellStyle = workbook.createCellStyle(); // 设置这些样式 cellStyle.setFillForegroundColor(HSSFColor.SKY_BLUE.index); cellStyle.setFillPattern(CellStyle.SOLID_FOREGROUND); cellStyle.setBorderBottom(CellStyle.BORDER_THIN); cellStyle.setBorderLeft(CellStyle.BORDER_THIN); cellStyle.setBorderRight(CellStyle.BORDER_THIN); cellStyle.setBorderTop(CellStyle.BORDER_THIN); cellStyle.setAlignment(CellStyle.ALIGN_CENTER); row.createCell(0).setCellStyle(cellStyle); row.createCell(0).setCellValue(url); row.createCell(1).setCellStyle(cellStyle); row.createCell(1).setCellValue(title); row.createCell(2).setCellStyle(cellStyle); row.createCell(2).setCellValue(detail); row.createCell(3).setCellStyle(cellStyle); row.createCell(3).setCellValue(answer); workbook.setSheetName(0, "信息"); try { File file = new File("D:/crub/zhihu.xlsx"); FileOutputStream fileoutputStream = new FileOutputStream(file); workbook.write(fileoutputStream); fileoutputStream.close(); } catch (IOException e) { e.printStackTrace(); } } }