jsoup爬虫数据,导出xls
package com.bootdo.testDemo;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.ss.usermodel.VerticalAlignment;
import org.apache.poi.ss.util.CellRangeAddress;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.client.RestTemplate;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.bootdo.common.utils.HttpUtil;
@RestController()
@RunWith(SpringRunner.class)
@SpringBootTest
public class TestDemo {
public void createExcel(List<Map<String,String>> list){
// 第一步,创建一个webbook,对应一个Excel文件
HSSFWorkbook wb = new HSSFWorkbook();
// 第二步,在webbook中添加一个sheet,对应Excel文件中的sheet
HSSFSheet sheet = wb.createSheet("mySheet");
sheet.setDefaultColumnWidth(20);// 默认列宽
// 第三步,在sheet中添加表头第0行,注意老版本poi对Excel的行数列数有限制short
//单元格样式
HSSFCellStyle cellStyle = wb.createCellStyle(); // 单元格样式
//字体样式
Font fontStyle = wb.createFont();
fontStyle.setBold(true); // 加粗
fontStyle.setFontName("黑体"); // 字体
fontStyle.setFontHeightInPoints((short) 11); // 大小
fontStyle.setColor(Font.COLOR_RED);//颜色
//字体样式添加到单元格样式中
cellStyle.setFont(fontStyle);
//单元格风格
cellStyle.setAlignment(HorizontalAlignment.CENTER);//左右居中
cellStyle.setVerticalAlignment(VerticalAlignment.CENTER);//上下居中
//单元格背景色
cellStyle.setFillForegroundColor(IndexedColors.YELLOW.getIndex());
cellStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
for (int i = 0; i < list.size(); i++) {
Map<String,String> map=list.get(i);
HSSFRow row = sheet.createRow((int) i); //此代码只生成一行数据
//生成单元格
HSSFCell cell0=row.createCell((int)0);
//单元格内容
cell0.setCellValue(map.get("text0"));
HSSFCell cell1=row.createCell((int)1);
cell1.setCellValue(map.get("text1"));
HSSFCell cell2=row.createCell((int)2);
cell2.setCellValue(map.get("text2"));
HSSFCell cell3=row.createCell((int)3);
cell3.setCellValue(map.get("text3"));
HSSFCell cell4=row.createCell((int)4);
cell4.setCellValue(map.get("text4"));
HSSFCell cell5=row.createCell((int)5);
cell5.setCellValue(map.get("text5"));
HSSFCell cell6=row.createCell((int)6);
cell6.setCellValue("http://zwfw-new.hunan.gov.cn/hnvirtualhall/hndirectory/"+map.get("text6"));
HSSFCell cell7=row.createCell((int)7);
cell7.setCellValue(map.get("text7"));
HSSFCell cell8=row.createCell((int)8);
cell8.setCellValue(map.get("text8"));
HSSFCell cell9=row.createCell((int)9);
cell9.setCellValue(map.get("text9"));
HSSFCell cell10=row.createCell((int)10);
cell10.setCellValue(map.get("text10"));
HSSFCell cell11=row.createCell((int)11);
cell11.setCellValue(map.get("text11"));
HSSFCell cell12=row.createCell((int)12);
cell12.setCellValue(map.get("text12"));
}
/* for(int i=0;i<3;i++){
//设置单元格样式
cell.setCellStyle(cellStyle);
}*/
//输出Excel文件
FileOutputStream output=null;
try {
output= new FileOutputStream("d:\\home\\gworkbook1.xls");
wb.write(output);
output.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if(output!=null) {
output.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) throws IOException {
List<Map<String,String>> list=new ArrayList<Map<String,String>>();
int offst=0;
for (int i = 0; i < 77; i++) {
offst=i*10;
String url="http://zwfw-new.hunan.gov.cn/hnvirtualhall/hndirectory/publiclist.jsp?jgname=&pager.offset="+offst+"&pager.sortKey=null&pager.desc=true";
String html=HttpUtil.doPostForTRS1(url, "UTF-8");
Document document = Jsoup.parse(html);
Element postList = document.getElementsByClass("sfqd_mid_table").get(0);
Elements postItems = postList.getElementsByClass("tr0");
String regexp = "\'";
for (Element postItem : postItems) {
Map<String,String> map =new HashMap<String, String>();
map.put("text0", postItem.select("td").get(0).text());
map.put("text1", postItem.select("td").get(1).text());
map.put("text2", postItem.select("td").get(2).text());
map.put("text3", postItem.select("td").get(3).text());
map.put("text4", postItem.select("td").get(4).text());
map.put("text5","");
String text6=postItem.select("tr").attr("onclick").substring(32, postItem.select("tr").attr("onclick").length()).replaceAll(regexp, "");
map.put("text6",text6 );
String urld="http://zwfw-new.hunan.gov.cn/hnvirtualhall/hndirectory/"+text6;
String html6=HttpUtil.doPostForTRS1(urld, "UTF-8");
Document document6 = Jsoup.parse(html6);
Element postList6 = document6.getElementsByClass("qd_xq_table").get(0);
Elements postItems6 = postList6.getElementsByClass("t2");
int t=6;
for (Element postItem6 : postItems6) {
t++;
map.put("text"+t,postItem6.text() );
}
list.add(map);
}
}
TestDemo t= new TestDemo();
t.createExcel(list);
}
}