1、注册个人微信公众号
2、登录微信公众平台=》素材管理-》新建图文模板=》点击上方“超链接”=》选择其他公众号(这里输入需要获取文章的公众号名称)=》点击查询前先打开浏览器的调试,获取要请求的url和请求参数
参考下列图:
3、使用步骤2中的url去请求,获得该公众号下的文章地址列表url
@Test
public void get(){
CloseableHttpClient httpClient = HttpClientBuilder.create().build();
HttpGet httpGet= new HttpGet("步骤2中的url");
//请求头
httpGet.setHeader("cookie", "步骤2中cookie");
httpGet.setHeader("user-agent", "步骤2中user-agent");
CloseableHttpResponse response = null;
try {
RequestConfig requestConfig = RequestConfig.custom()
// 设置连接超时时间(毫秒)
.setConnectTimeout(5000)
// 设置请求超时时间(毫秒)
.setConnectionRequestTimeout(5000)
// socket读写超时时间(毫秒)
.setSocketTimeout(5000)
// 设置是否允许重定向
.setRedirectsEnabled(true).build();
httpGet.setConfig(requestConfig);
response = httpClient.execute(httpGet);
// 返回参数
HttpEntity responseEntity = response.getEntity();
System.out.println("响应状态为:" + response.getStatusLine());
System.out.println("响应内容为:" + EntityUtils.toString(responseEntity));
} catch (ClientProtocolException e) {
//TODO
e.printStackTrace();
} catch (ParseException e) {
//TODO
e.printStackTrace();
} catch (IOException e) {
//TODO
e.printStackTrace();
} finally {
if (httpClient != null) {
httpClient.close();
}
if (response != null) {
response.close();
}
}
}
4、请求步骤3中获取的文章地址url,使用jsoup解析返回的html文件得到文章内容
<!--jsoup-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
@Test
public void parseHtml() throws Exception {
String url = "步骤3中返回列表的Url(link字段)";
//请求方式与步骤3类似,不需要传请求头
String htmlContent = get(url);
Document doc = Jsoup.parse(htmlContent);
//根据html的div中的class来解析
Elements e = doc.getElementsByClass("rich_media_content");
System.out.println("解析的html内容为" + e.text());
}