使用HttpClient爬取国内疫情数据

废话先不多说,看下代码:

使用的jar(json的工具jar还有httpClient的jar):

		<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpclient</artifactId>
			<version>4.5.5</version>
		</dependency>

		<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
		<dependency>
			<groupId>com.alibaba</groupId>
			<artifactId>fastjson</artifactId>
			<version>1.2.62</version>
		</dependency>


import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;

import com.alibaba.fastjson.JSONArray;

/**
 * 爬取中国的疫情数据使用HttpClient
 * @author Yuer
 *
 */
public class HttpClientChina {

	
	public static void main(String[] args) throws Exception {
		
		parseHtml1();
	}

	
	
	
	public static JSONArray parseHtml1() throws IOException {
		
		
		
		// 先引入httpclient的包
		HttpClient client =  HttpClientBuilder.create().build();
		HttpGet get = new HttpGet("https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0");
		get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0");
		
        HttpResponse response = client.execute(get);  

        if (response.getStatusLine().getStatusCode() == 200) {  
            HttpEntity resEntity = response.getEntity();  
            String message = EntityUtils.toString(resEntity, "utf-8");  
//            System.out.println(message);  
            
            // 正则获取数据
            // 因为html的数据格式看着就像json格式,所以我们正则获取json
            String reg = "window.getAreaStat = (.*?)\\}(?=catch)";
            Pattern totalPattern = Pattern.compile(reg);
            Matcher totalMatcher = totalPattern.matcher(message);

            String result = "";
            if (totalMatcher.find()) {
                result = totalMatcher.group(1);
//                System.out.println(result);
                // 各个省市的是一个列表List,如果想保存到数据库中,要遍历结果,下面是demo
                JSONArray array = JSONArray.parseArray(result);
                System.out.println(array);
                
                return array;
            }
           
            
        } else {  
            System.out.println("请求失败");  
        }  

		return null;
		
	}
	
	
	

	

}

最开始使用jsoup和socket去获取,效果不太理想,后面决定使用HttpClient,然后这里待解决的问题是将该json数组的数据转为excel来便于观看,但是弄了半天没成功,以后改进吧。使用的jar:

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>3.10-FINAL</version>
		</dependency>

		<dependency>
			<groupId>commons-io</groupId>
			<artifactId>commons-io</artifactId>
			<version>2.5</version>
		</dependency>
发布了41 篇原创文章 · 获赞 58 · 访问量 3511
App 阅读领勋章
微信扫码 下载APP
阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 技术工厂 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览