爬取出来的json对他进行解析出我们需要的

{"data":{"start":0,"pageSize":10,"totalCount":50,"results":[{"name":"四豹电机科技(上海)有限公司","idCardOrOrgCode":"310118003045876","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":3,"otherCount":0,"time":null,"encryStr":"fWlvQ2htaDJBcnk=\n","entStatus":1},{"name":"上海申风图文制作有限公司","idCardOrOrgCode":"310229000961333","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":1,"otherCount":0,"time":null,"encryStr":"W29wMG8wbUNxNg==\n","entStatus":1},{"name":"上海泰成科技发展有限公司","idCardOrOrgCode":"310229000380221","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":4,"otherCount":0,"time":null,"encryStr":"UWs2cX19dTRiZA==\n","entStatus":1},{"name":"上海朱港砼制品有限公司","idCardOrOrgCode":"310118002437791","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":21,"otherCount":0,"time":null,"encryStr":"OWt9aGJrMktxfQ==\n","entStatus":1},{"name":"上海传志快物流有限公司","idCardOrOrgCode":"310226000666523","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":3,"otherCount":0,"time":null,"encryStr":"NkE5e2p0MVExaA==\n","entStatus":1},{"name":"上海元真工贸有限公司","idCardOrOrgCode":"3102292095870","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":2,"otherCount":0,"time":null,"encryStr":"fUs5OUN1cGg0ZnQ=\n","entStatus":2},{"name":"武宁县金鑫汽车运输有限公司","idCardOrOrgCode":"360423210001796","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":1,"otherCount":0,"time":null,"encryStr":"UTJqeTlDZDIxeQ==\n","entStatus":1},{"name":"平顶山市顺风汽车运输有限公司","idCardOrOrgCode":"410491000008312","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":1,"otherCount":0,"time":null,"encryStr":"cXt7NnB6aGd0MHo=\n","entStatus":1},{"name":"江苏首义置业有限公司","idCardOrOrgCode":"321324000033480","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":100,"otherCount":0,"time":null,"encryStr":"enpmUWVlZWh3MTY=\n","entStatus":1},{"name":"宿迁市建设工程(集团)有限公司","idCardOrOrgCode":"321300000006608","objectType":2,"goodCount":0,"badCount":0,"dishonestyCount":90,"otherCount":0,"time":null,"encryStr":"Mn1lcGowdGN7MFE=\n","entStatus":1}],"page":1,"currentPageNo":1,"totalPageCount":5}}

上面是获得的json数据

进行解析时,需要知道,我们需要的是results后面和page前面的中间的数组数据

本文主要用fastJson解析,crawler是实体类,doc就是json文本,返回的是List列表

 //解析json
    public static List<Crawler> getJsonData (Document doc)  {

        //获取待解析的html文件
        String html=doc.html();
        //fastJson测试
        //just contain the preview
        List<Crawler> crawlerJsonData=new ArrayList<Crawler>();
        Pattern data1 = Pattern.compile("results\":(.*?)\\,\"page\"");//进行解析
        Matcher dataMatcher1 = data1.matcher(html);
        String da1="";
        while (dataMatcher1.find()) {
            //待解析的json字符串
            da1=dataMatcher1.group(1);
        }
        if (da1.length()!=0) {
            List<Crawler> jsonmodel1 = JSON.parseArray(da1,Crawler.class);
            for (Crawler jso:jsonmodel1 ) {
                Crawler crawlerModel=new Crawler();
                crawlerModel.setName(jso.getName());
                crawlerModel.setIdCardOrOrgCode(jso.getIdCardOrOrgCode());
                crawlerModel.setGoodCount(jso.getGoodCount());
                crawlerModel.setBadCount(jso.getBadCount());
                crawlerModel.setDishonestyCount(jso.getDishonestyCount());
                crawlerModel.setEncryStr(jso.getEncryStr());
                crawlerJsonData.add(crawlerModel);

            }
        }

        return crawlerJsonData;
    }

在下面进行调用

String Starturl = "http://www.creditchina.gov.cn/credit_info_search?keyword=&searchtype=0&objectType=2&areas=&creditType=8&dataType=1&areaCode=&templateId=1&exact=0&page=1";
            Document doc = Jsoup.connect(Starturl).userAgent("bbb").timeout(120000).ignoreContentType(true).get();

            List<Crawler> crawlers = new ArrayList<Crawler>();
            crawlers = getJsonData(doc);



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值