- 网站地址
- 请求地址
http://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/4/ajax/1/
其中的 page/4 就是页码
- 访问网站 抓取每一页的股票数据
private String crawler(String url) {
try {
WebRequest request = new WebRequest(new URL(url), HttpMethod.GET);
Map<String, String> additionalHeaders = new HashMap<>();
additionalHeaders.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36");
additionalHeaders.put("Accept-Language", "zh-CN,zh;q=0.8");
additionalHeaders.put("Accept", "*/*");
// 设置请求头信息
request.setAdditionalHeaders(additionalHeaders);
Page page = webClient.getPage(url);
// 将响应信息转为String并返回
return page.getWebResponse().getContentAsString();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
- 保存抓到的网页数据
private void work(String url) {
try {
// 打开网页
webClient.getPage(url);
int pageCount = 5;
for (int i = 1