java爬取国家应急平台漏洞公告数据

java爬取http类型的网站比较容易实现,因为不需要建立证书的通道,直接通过httpclient访问链接获取相应源码就可以获取相关数据,现在我们可以通过证书的方式,实现java爬取https网站的相关数据。

获取https网站证书的方式可以看我上一篇博客:https://blog.csdn.net/qq_36706878/article/details/102546563

这里直接附上源码和运行结果

import java.io.File;
import java.io.IOException;

import javax.net.ssl.SSLContext;

import org.apache.http.*;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.apache.commons.logging.*;

public class HttpsReptile {
	//国家应急中心域名
	public static String mainUrl = "https://www.cert.org.cn";
	
	public static void httpsReptile(){
		 //国家应急中心漏洞公告页面
		 String leakUrl = "https://www.cert.org.cn/publish/main/9/index.html";
		 //处理第一页的源码
		 Document leakListDoc = sendHttp(leakUrl);
		 int page = 1;
		 System.out.println("处理第"+page+"页数据");
		 solvePageData(leakListDoc);
		 getPageCode(leakListDoc,page);
		 
	  
	}
	/**
	 * 获取下一页的源码
	 * @param leakListDoc
	 * @param page
	 */
	public static void getPageCode(Document leakListDoc,int page) {
		//获取下一页的链接 访问获取源码
		 Elements aElements = leakListDoc.getElementsByTag("a");
		 for(int i=0;i<aElements.size();i++){
			 
			 Element aString = aElements.get(i);
			 if(aString.toString().contains("下一页")) {
				 String nextHref = aString.attr("href");
				 nextHref = mainUrl + nextHref;
				 //处理下一页的源码
				 Document newPageLeakListDoc = sendHttp(nextHref);
				 page++;
				 System.out.println("处理第"+page+"页数据");
				 solvePageData(newPageLeakListDoc);
				 getPageCode(newPageLeakListDoc,page);
				
			 }
		 }
	}
	/**
	 * 处理某一页的漏洞公告
	 * @param leakListDoc
	 */
	public static void solvePageData(Document leakListDoc) {
		Elements ulElements = leakListDoc.getElementsByClass("waring_con");
		 for(int i=0;i<ulElements.size();i++){
			 Elements liElements = ulElements.get(i).getElementsByTag("li");
			 for(int j=0;j<liElements.size();j++) {
				 String onclickvalue = liElements.get(j).toString();
				 onclickvalue = onclickvalue.substring(85, 159);
				 System.out.println(onclickvalue);
				 
			 }
		 }
	}
	
	public static Document sendHttp(String url) {
        String html = "";
        CloseableHttpClient httpclient = null;
        CloseableHttpResponse response = null;
        try {
            SSLConnectionSocketFactory sslsf = createSSLConnSocketFactory();
            httpclient = HttpClients.custom()
                .setSSLSocketFactory(sslsf).build();
            HttpGet httpget = new HttpGet(url);
            httpget.addHeader(HttpHeaders.USER_AGENT,
                    "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0");
            RequestConfig requestConfig = RequestConfig.custom()
                    .setSocketTimeout(10000).setConnectTimeout(10000).build();// 设置请求和传输超时时间
            httpget.setConfig(requestConfig);
            System.out.println("Executing request " + httpget.getRequestLine());
            response = httpclient.execute(httpget);
            HttpEntity entity = response.getEntity();
            System.out.println("----------------------------------------");
            System.out.println(response.getStatusLine());
            int resStatu = response.getStatusLine().getStatusCode();// 返回码
            if (resStatu == HttpStatus.SC_OK) {// 200正常 其他就不对
                // 获得相应实体
                if (entity != null) {
                    html = EntityUtils.toString(entity, "UTF-8");
                    html = html.replace("&nbsp;", " ");
                    //System.out.println(html);
                }
            }
            EntityUtils.consume(entity);
        } catch(Exception e){
            e.printStackTrace();
        }finally{
            if(response!=null){
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if(httpclient!=null){
                try {
                    httpclient.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        Document document = Jsoup.parse(html);
		return document;
    }
	
	// ssl通道证书的创建
    private static SSLConnectionSocketFactory createSSLConnSocketFactory()
            throws Exception {
        SSLContext sslcontext = SSLContexts
                .custom()
                .loadTrustMaterial(
                        new File(
                                "E:/key.keystore"),
                        "123456".toCharArray(), new TrustSelfSignedStrategy())   //文件和密码要对应
                .build();
        SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(
                sslcontext, new String[] { "TLSv1" }, null,
                SSLConnectionSocketFactory.getDefaultHostnameVerifier());
        return sslsf;
    }

	
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		httpsReptile();
		//sendHttp("https://blog.csdn.net/qq_36706878");
	}

}

运行结果如下 获取到详情页的各个链接  再次获取数据就行

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
爬取CNVD漏洞信息,可以使用Java中的HttpClient和Jsoup库来实现。具体步骤如下: 1. 使用HttpClient发送请求,获取CNVD漏洞列表页面的HTML源码。 ```java CloseableHttpClient httpClient = HttpClients.createDefault(); HttpGet httpGet = new HttpGet("http://www.cnvd.org.cn/flaw/list.htm"); CloseableHttpResponse response = httpClient.execute(httpGet); String html = EntityUtils.toString(response.getEntity(), "UTF-8"); ``` 2. 使用Jsoup解析HTML源码,获取漏洞列表。 ```java Document doc = Jsoup.parse(html); Elements flawList = doc.select(".flaw_list tr:gt(0)"); for (Element flaw : flawList) { // 获取漏洞信息,如漏洞名称、CNVD-ID、公开日期、危害级别等 String name = flaw.select(".flaw_tit a").text(); String cnvdId = flaw.select(".flaw_list_c td:eq(1)").text(); String publicDate = flaw.select(".flaw_list_c td:eq(2)").text(); String severity = flaw.select(".flaw_list_c td:eq(3)").text(); // TODO: 处理漏洞信息 } ``` 3. 可以将获取到的漏洞信息存储到数据库或者文件中。 完整代码示例: ```java import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; public class CnvdCrawler { public static void main(String[] args) throws IOException { CloseableHttpClient httpClient = HttpClients.createDefault(); HttpGet httpGet = new HttpGet("http://www.cnvd.org.cn/flaw/list.htm"); CloseableHttpResponse response = httpClient.execute(httpGet); String html = EntityUtils.toString(response.getEntity(), "UTF-8"); Document doc = Jsoup.parse(html); Elements flawList = doc.select(".flaw_list tr:gt(0)"); for (Element flaw : flawList) { String name = flaw.select(".flaw_tit a").text(); String cnvdId = flaw.select(".flaw_list_c td:eq(1)").text(); String publicDate = flaw.select(".flaw_list_c td:eq(2)").text(); String severity = flaw.select(".flaw_list_c td:eq(3)").text(); System.out.println(name + " " + cnvdId + " " + publicDate + " " + severity); } } } ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值