import jxl.Sheet; import jxl.Workbook; import jxl.read.biff.BiffException; import org.apache.http.HttpEntity; import org.apache.http.HttpStatus; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.HttpClientUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; public class HttpClientTest { public static void main(String[] args) { //解析Excel try { Workbook workbook = Workbook.getWorkbook(new File("E://beijing.xls")); Sheet sheet = workbook.getSheet(0); //单条:http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/11/01/01/110101001.html System.out.println("excel总行数:"+sheet.getRows()); String [][] values = new String[8000][3]; int size = 0; for (int i = 0; i<sheet.getRows(); i++){ System.out.println(sheet.getCell(0,i).getContents()); int size1 = getHtml(sheet.getCell(0, i).getContents(), values, size); size += size1; } //System.out.println("111111111111"); getHSSFWorkbook("xingzhengquhua.xls",null,values,null); } catch (IOException e) { e.printStackTrace(); } catch (BiffException e) { e.printStackTrace(); } //------------------------------------------ //1.生成httpclient,相当于该打开一个浏览器 } public static int getHtml(String html1, String [][] values,int size){ CloseableHttpClient httpClient = HttpClients.createDefault(); CloseableHttpResponse response = null; //2.创建get请求,相当于在浏览器地址栏输入 网址 HttpGet request = new HttpGet(html1); try { //3.执行get请求,相当于在输入地址栏后敲回车键 response = httpClient.execute(request); //4.判断响应状态为200,进行处理 if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { //5.获取响应内容 HttpEntity httpEntity = response.getEntity(); String html = EntityUtils.toString(httpEntity, "gb2312"); System.out.println("获取内容:"+html); org.jsoup.nodes.Document parse = Jsoup.parse(html); org.jsoup.nodes.Element title = parse.getElementsByTag("title").first(); System.out.println(title); Elements elementsByClass = parse.getElementsByClass("villagetr"); System.out.println("获取数据:"+elementsByClass); int z=size; for (Element byClass : elementsByClass) { Elements td = byClass.getElementsByTag("td"); String[] strings = new String[3]; int k=0; for (Element element : td) { String text = element.text(); System.out.println("td数据:"+text); strings[k]=text; k++; } values[z]=strings; z++; } return elementsByClass.size(); } else { //如果返回状态不是200,比如404(页面不存在)等,根据情况做处理,这里略 System.out.println("返回状态不是200"); System.out.println(EntityUtils.toString(response.getEntity(), "utf-8")); } } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { //6.关闭 HttpClientUtils.closeQuietly(response); HttpClientUtils.closeQuietly(httpClient); } return 0; } public static HSSFWorkbook getHSSFWorkbook(String sheetName,String []title,String [][]values, HSSFWorkbook wb){ if(wb == null){ wb = new HSSFWorkbook(); } HSSFSheet sheet = wb.createSheet(sheetName); sheet.createRow(0); HSSFCellStyle style = wb.createCellStyle(); style.setAlignment(HSSFCellStyle.ALIGN_CENTER); for(int i=0;i<values.length;i++){ HSSFRow row = sheet.createRow(i + 1); for(int j=0;j<values[i].length;j++){ row.createCell(j).setCellValue(values[i][j]); } } try { FileOutputStream fileOutputStream = new FileOutputStream("E:/beijingshi.xls"); wb.write(fileOutputStream); } catch (IOException e) { e.printStackTrace(); } return wb; } }
/*所需jar
<dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.12.1</version> </dependency> <dependency> <groupId>net.sourceforge.jexcelapi</groupId> <artifactId>jxl</artifactId> <version>2.6.12</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.6</version> </dependency>
*/