package com.cjl.industrial.application.bottle.controller; import org.apache.http.HttpEntity; import org.apache.http.ParseException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.*; public class test { public static void main(String[] args) throws IOException { FileInputStream fileInputStream = new FileInputStream("D:\\123.docx"); // 这里是你要写入的文件 XWPFDocument xdoc = new XWPFDocument(fileInputStream); // 创建一个段落 XWPFParagraph xpara = xdoc.createParagraph(); // 一个XWPFRun代表具有相同属性的一个区域。 XWPFRun run = xpara.createRun(); // 获取连接客户端工具 CloseableHttpClient httpClient = HttpClients.createDefault(); String entityStr = null; CloseableHttpResponse response = null; try { // 创建POST请求对象 for(int b=1;b<=25;b++){ String url ="http://www.renyiwang.net/Student/PracticePreview.asp?o_id=6&q_id=2501&questions=&pn="+b; HttpPost httpPost = new HttpPost(url); //登录信息 //httpPost.addHeader("Cookie", ""); response = httpClient.execute(httpPost); // 获得响应的实体对象 HttpEntity entity = response.getEntity(); // 使用Apache提供的工具类进行转换成字符串 entityStr = EntityUtils.toString(entity, "UTF-8"); org.jsoup.nodes.Document document = Jsoup.parse(entityStr); Elements tr = document.getElementsByTag("tr"); for(int a=0;a<tr.size();a++){ Element element = tr.get(a); String text = element.text(); run.setText(text+"\r"); run = xpara.createRun(); } } OutputStream os = new FileOutputStream("D://345.docx"); xdoc.write(os); os.close(); } catch (ClientProtocolException e) { System.err.println("Http协议出现问题"); e.printStackTrace(); } catch (ParseException e) { System.err.println("解析错误"); e.printStackTrace(); } catch (IOException e) { System.err.println("IO异常"); e.printStackTrace(); } finally { // 释放连接 if (null != response) { try { response.close(); httpClient.close(); } catch (IOException e) { System.err.println("释放连接出错"); e.printStackTrace(); } } } // 打印响应内容 System.out.println(entityStr); } }
通过接口获取的html文件进行解析
最新推荐文章于 2023-12-15 17:54:55 发布