企业信息查询爬虫-Java

package com.hyy2;


import java.util.HashMap;
import java.util.Map;


import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.jsoup.nodes.Element;


public class HttpLogin {
    private static String url = "https://www.qichacha.com/";
    public static String search(String key) {
        try {
            HttpClient httpClient = new HttpClient();
            System.out.println("模拟登录成功");
            // 进行登陆后的操作
            String dataUrl = "https://www.qichacha.com/search?key="+key;
            GetMethod getMethod = new GetMethod(dataUrl);
            // 每次访问需授权的网址时需带上前面的 cookie 作为通行证
            // 以下为本地登录后在浏览器内缓存的cookie值 *****每个人的都不一样
            String s = "CNZZDATA1254842228=本地保存自己找;";
            String a = "PHPSESSID=本地保存自己找;";
            String b = "_uab_collina=本地保存自己找;";
            String c = "_umdata=本地保存自己找;";
            String d = "acw_tc=本地保存自己找;";
            String e = "hasShow=本地保存自己找";
            getMethod.setRequestHeader("cookie", s + a + b + c + d + e);
            getMethod.setRequestHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36");
            httpClient.executeMethod(getMethod);
            // 返回值接收
            String text = getMethod.getResponseBodyAsString();
            Document doc = Jsoup.parse(text);
            // 页面属性选择 通过key值取到select的内容
            Elements links = doc.select("tbody>tr>td>a");
            // 选取第一个元素就是要访问的公司信息
            String href = links.get(0).attr("href");
            System.out.println(url + href);
            // 进入页面
            return url + href;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
        
    }
    public static void enter(String url){

        //为了方便导出创建了javabean

        Business bus=new Business();

        try {
            HttpClient httpClient = new HttpClient();
            // 进行登陆后的操作
            GetMethod getMethod = new GetMethod(url);
            // 每次访问需授权的网址时需带上前面的 cookie 作为通行证
            // 以下为本地登录后在浏览器内缓存的cookie值 *****每个人的都不一样
            String s = "CNZZDATA1254842228=;";
            String a = "PHPSESSID=;";
            String b = "_uab_collina=;";
            String c = "_umdata=;";
            String d = "acw_tc=;";
            String e = "hasShow=";
            getMethod.setRequestHeader("cookie", s + a + b + c + d + e);
            getMethod.setRequestHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36");
            httpClient.executeMethod(getMethod);
            // 返回值接收
            String text = getMethod.getResponseBodyAsString();
            Document doc = Jsoup.parse(text);
            // 页面属性选择 通过key值取到select的内容
            Elements links = doc.select("div#base_div>section#Cominfo>table.ntable>tbody>tr>td.ma_left>div.boss-td>div.clearfix>div.pull-left>a.bname");
          //法人姓名
            String name=links.get(0).text();
            bus.setLegalPerson(name);
            System.out.println(name);
            //获取其他信息   得到两个tbody  分析得出需要第二个
            Element links2=doc.select("div#base_div>section#Cominfo>table.ntable>tbody").last();
            //进行选择
            Elements elements2 = links2.select("tr>td");
            bus.setRegisteredCapital(elements2.get(1).text());
            bus.setPaidInCapital(elements2.get(3).text());
            bus.setBusinessStatus(elements2.get(5).text());
            bus.setDateOfEstablishment(elements2.get(7).text());
            bus.setID(elements2.get(9).text());
            bus.setCode(elements2.get(11).text());
            bus.setRegistrationNumber(elements2.get(13).text());
            bus.setOrganizationCode(elements2.get(15).text());
            bus.setCompanyType(elements2.get(17).text());
            bus.setIndustry(elements2.get(19).text());
            bus.setApprovalDate(elements2.get(21).text());
            bus.setRegistrationAuthority(elements2.get(23).text());
            bus.setDistrictBelong(elements2.get(25).text());
            bus.setEnglishName(elements2.get(27).text());
            bus.setUsedName(elements2.get(29).text());
            bus.setType(elements2.get(31).text());
            bus.setStaffSize(elements2.get(33).text());
            bus.setOperatingPeriod(elements2.get(35).text());
            bus.setAddress(elements2.get(37).text().replaceAll("查看地图", "").replaceAll("经营范围", ""));
            bus.setBusinessScope(elements2.get(39).text());
            System.out.println(bus);
//            for (Element element : elements2) {
//                String str = element.text();
//                System.out.println(str);
//            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    public static void main(String[] args) {
        //传入社会统一信用代码或者公司全称
        String url=search("");
        
        enter(url);
    }

}

package com.hyy2;


public class Business {
    
    //法人
    private String LegalPerson;
    //注册资本
    private String RegisteredCapital;
    //实缴资本
    private String PaidInCapital;
    //经营状态
    private String BusinessStatus;
    //成立日期
    private String DateOfEstablishment;
    //统一社会信用代码
    private String ID;
    //纳税人识别号
    private String Code;
    //注册号
    private String RegistrationNumber;
    //组织机构代码
    private String OrganizationCode;
    //公司类型
    private String companyType;
    @Override
    public String toString() {
        return "法人:"+LegalPerson+";注册资本:"+RegisteredCapital+";实缴资本:"+PaidInCapital+
                ";经营状态:"+BusinessStatus+";成立日期:"+DateOfEstablishment+
    ";统一社会信用代码:"+ID+";纳税人识别号:"+Code+";注册号:"+RegistrationNumber+";组织机构代码:"+OrganizationCode+
    ";公司类型:"+companyType+";所属行业:"+Industry+";核准日期:"+ApprovalDate+";登记机关:"+RegistrationAuthority+
    ";所属地区:"+districtBelong+";英文名:"+EnglishName+
    ";曾用名:"+usedName+";经营方式:"+type+";人员规模:"+StaffSize+";营业期限:"+OperatingPeriod+";企业地址:"+address+";经营范围:"+BusinessScope;
    }
    //所属行业
    private String Industry;
    //核准日期
    private String ApprovalDate;
    //登记机关
    private String RegistrationAuthority;
    //所属地区
    private String districtBelong;
    //英文名
    private String EnglishName;
    //曾用名
    private String usedName;
    //经营方式
    private String type;
    //人员规模
    private String StaffSize;
    //营业期限
    private String OperatingPeriod;
    //企业地址
    private String address;
    //经营范围
    private String BusinessScope;
    public String getLegalPerson() {
        return LegalPerson;
    }
    public void setLegalPerson(String legalPerson) {
        LegalPerson = legalPerson;
    }
    public String getRegisteredCapital() {
        return RegisteredCapital;
    }
    public void setRegisteredCapital(String registeredCapital) {
        RegisteredCapital = registeredCapital;
    }
    public String getPaidInCapital() {
        return PaidInCapital;
    }
    public void setPaidInCapital(String paidInCapital) {
        PaidInCapital = paidInCapital;
    }
    public String getBusinessStatus() {
        return BusinessStatus;
    }
    public void setBusinessStatus(String businessStatus) {
        BusinessStatus = businessStatus;
    }
    public String getDateOfEstablishment() {
        return DateOfEstablishment;
    }
    public void setDateOfEstablishment(String dateOfEstablishment) {
        DateOfEstablishment = dateOfEstablishment;
    }
    public String getID() {
        return ID;
    }
    public void setID(String iD) {
        ID = iD;
    }
    public String getCode() {
        return Code;
    }
    public void setCode(String code) {
        Code = code;
    }
    public String getRegistrationNumber() {
        return RegistrationNumber;
    }
    public void setRegistrationNumber(String registrationNumber) {
        RegistrationNumber = registrationNumber;
    }
    public String getOrganizationCode() {
        return OrganizationCode;
    }
    public void setOrganizationCode(String organizationCode) {
        OrganizationCode = organizationCode;
    }
    public String getCompanyType() {
        return companyType;
    }
    public void setCompanyType(String companyType) {
        this.companyType = companyType;
    }
    public String getIndustry() {
        return Industry;
    }
    public void setIndustry(String industry) {
        Industry = industry;
    }
    public String getApprovalDate() {
        return ApprovalDate;
    }
    public void setApprovalDate(String approvalDate) {
        ApprovalDate = approvalDate;
    }
    public String getRegistrationAuthority() {
        return RegistrationAuthority;
    }
    public void setRegistrationAuthority(String registrationAuthority) {
        RegistrationAuthority = registrationAuthority;
    }
    public String getDistrictBelong() {
        return districtBelong;
    }
    public void setDistrictBelong(String districtBelong) {
        this.districtBelong = districtBelong;
    }
    public String getEnglishName() {
        return EnglishName;
    }
    public void setEnglishName(String englishName) {
        EnglishName = englishName;
    }
    public String getUsedName() {
        return usedName;
    }
    public void setUsedName(String usedName) {
        this.usedName = usedName;
    }
    public String getType() {
        return type;
    }
    public void setType(String type) {
        this.type = type;
    }
    public String getStaffSize() {
        return StaffSize;
    }
    public void setStaffSize(String staffSize) {
        StaffSize = staffSize;
    }
    public String getOperatingPeriod() {
        return OperatingPeriod;
    }
    public void setOperatingPeriod(String operatingPeriod) {
        OperatingPeriod = operatingPeriod;
    }
    public String getAddress() {
        return address;
    }
    public void setAddress(String address) {
        this.address = address;
    }
    public String getBusinessScope() {
        return BusinessScope;
    }
    public void setBusinessScope(String businessScope) {
        BusinessScope = businessScope;
    }
    

}

自己先用浏览器登录cookie信息就会保存本地    多个企业查询如果数量大的话建议增加系统等待时间

由于我自己用的是谷歌浏览器cookie保存的位置在浏览器输入chrome://settings/siteData  自己找

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值