{"data":{"start":0,"pageSize":10,"totalCount":50,"results":[{"name":"四豹电机科技(上海)有限公司","idCardOrOrgCode":"310118003045876","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":3,"otherCount":0,"time":null,"encryStr":"fWlvQ2htaDJBcnk=\n","entStatus":1},{"name":"上海申风图文制作有限公司","idCardOrOrgCode":"310229000961333","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":1,"otherCount":0,"time":null,"encryStr":"W29wMG8wbUNxNg==\n","entStatus":1},{"name":"上海泰成科技发展有限公司","idCardOrOrgCode":"310229000380221","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":4,"otherCount":0,"time":null,"encryStr":"UWs2cX19dTRiZA==\n","entStatus":1},{"name":"上海朱港砼制品有限公司","idCardOrOrgCode":"310118002437791","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":21,"otherCount":0,"time":null,"encryStr":"OWt9aGJrMktxfQ==\n","entStatus":1},{"name":"上海传志快物流有限公司","idCardOrOrgCode":"310226000666523","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":3,"otherCount":0,"time":null,"encryStr":"NkE5e2p0MVExaA==\n","entStatus":1},{"name":"上海元真工贸有限公司","idCardOrOrgCode":"3102292095870","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":2,"otherCount":0,"time":null,"encryStr":"fUs5OUN1cGg0ZnQ=\n","entStatus":2},{"name":"武宁县金鑫汽车运输有限公司","idCardOrOrgCode":"360423210001796","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":1,"otherCount":0,"time":null,"encryStr":"UTJqeTlDZDIxeQ==\n","entStatus":1},{"name":"平顶山市顺风汽车运输有限公司","idCardOrOrgCode":"410491000008312","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":1,"otherCount":0,"time":null,"encryStr":"cXt7NnB6aGd0MHo=\n","entStatus":1},{"name":"江苏首义置业有限公司","idCardOrOrgCode":"321324000033480","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":100,"otherCount":0,"time":null,"encryStr":"enpmUWVlZWh3MTY=\n","entStatus":1},{"name":"宿迁市建设工程(集团)有限公司","idCardOrOrgCode":"321300000006608","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":90,"otherCount":0,"time":null,"encryStr":"Mn1lcGowdGN7MFE=\n","entStatus":1}],"page":1,"currentPageNo":1,"totalPageCount":5}}
上面是获得的json数据
进行解析时,需要知道,我们需要的是results后面和page前面的中间的数组数据
本文主要用fastJson解析,crawler是实体类,doc就是json文本,返回的是List列表
//解析json
public static List<Crawler> getJsonData (Document doc) {
//获取待解析的html文件
String html=doc.html();
//fastJson测试
//just contain the preview
List<Crawler> crawlerJsonData=new ArrayList<Crawler>();
Pattern data1 = Pattern.compile("results\":(.*?)\\,\"page\"");//进行解析
Matcher dataMatcher1 = data1.matcher(html);
String da1="";
while (dataMatcher1.find()) {
//待解析的json字符串
da1=dataMatcher1.group(1);
}
if (da1.length()!=0) {
List<Crawler> jsonmodel1 = JSON.parseArray(da1,Crawler.class);
for (Crawler jso:jsonmodel1 ) {
Crawler crawlerModel=new Crawler();
crawlerModel.setName(jso.getName());
crawlerModel.setIdCardOrOrgCode(jso.getIdCardOrOrgCode());
crawlerModel.setGoodCount(jso.getGoodCount());
crawlerModel.setBadCount(jso.getBadCount());
crawlerModel.setDishonestyCount(jso.getDishonestyCount());
crawlerModel.setEncryStr(jso.getEncryStr());
crawlerJsonData.add(crawlerModel);
}
}
return crawlerJsonData;
}
在下面进行调用
String Starturl = "http://www.creditchina.gov.cn/credit_info_search?keyword=&searchtype=0&objectType=2&areas=&creditType=8&dataType=1&areaCode=&templateId=1&exact=0&page=1";
Document doc = Jsoup.connect(Starturl).userAgent("bbb").timeout(120000).ignoreContentType(true).get();
List<Crawler> crawlers = new ArrayList<Crawler>();
crawlers = getJsonData(doc);