java爬虫之爬基金增长情况
先上pom文件
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>org.fusesource</groupId>
<artifactId>sigar</artifactId>
<version>1.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.23</version>
<exclusions>
<exclusion>
<artifactId>httpclient</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
代码
package com.test;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class HelloWorld {
public static void main(String[] args) throws Exception {
// 需要爬取的基金编号
List<String> codes = new ArrayList<String>();
codes.add("003834");
codes.add("002251");
codes.add("161725");
codes.add("003095");
List<String> result = new ArrayList<String>();
for (int i = 0; i < codes.size(); i++) {
String url = "http://fundgz.1234567.com.cn/js/"+codes.get(i)+".js?rt="+System.currentTimeMillis();
HttpGet httpGet = new HttpGet(url);
CloseableHttpClient httpclient = HttpClients.createDefault();
httpGet.setHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36");
CloseableHttpResponse response = httpclient.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
try {
HttpEntity entity = response.getEntity();
if (statusCode == 200) {
String html = EntityUtils.toString(entity, Consts.UTF_8);
Document doc = null;
doc = Jsoup.parse(html);
Elements ulList = doc.select("body");
String now = ulList.toString();
now = now.substring(now.indexOf("{"), now.indexOf("}")+1);
result.add(now);
EntityUtils.consume(response.getEntity());
}else {
EntityUtils.consume(response.getEntity());
}
} finally {
response.close();
}
}
System.out.println(result);
for (int i = 0; i < result.size(); i++) {
System.out.println(result.get(i));
}
}
}