po一个爬虫项目

10 篇文章 0 订阅
5 篇文章 0 订阅
package com.baosight.utils;

import org.quartz.JobBuilder;
import org.quartz.JobDetail;
import org.quartz.Scheduler;
import org.quartz.SchedulerFactory;
import org.quartz.SimpleScheduleBuilder;
import org.quartz.Trigger;
import org.quartz.TriggerBuilder;
import org.quartz.impl.StdSchedulerFactory;

public class QuartzUtil {

    public static void handleSimpleTrigger(String jobName, String jobGroupName,
                                           String triggerName, String triggerGroupName, Class jobClass,
                                           int time) {
        // 通过schedulerFactory获取一个调度器
        SchedulerFactory schedulerfactory = new StdSchedulerFactory();
        Scheduler scheduler = null;
        try {
            // 通过schedulerFactory获取一个调度器
            scheduler = schedulerfactory.getScheduler();
            // 创建jobDetail实例,绑定Job实现类
            // 指明job的名称,所在组的名称,以及绑定job类
            JobDetail job = JobBuilder.newJob(jobClass)
                    .withIdentity(jobName, jobGroupName).build();
            // 定义调度触发规则
            //使用simpleTrigger规则
            Trigger trigger=TriggerBuilder.newTrigger().withIdentity(triggerName,
                    triggerGroupName)
                    .withSchedule(SimpleScheduleBuilder.repeatSecondlyForever(time)).startNow().build();
            // 把作业和触发器注册到任务调度中
            scheduler.scheduleJob(job, trigger);
            // 启动调度
            scheduler.start();
        } catch (Exception e) {
            System.out.println("错了拉");
        }
    }

}

package com.baosight.utils;

import com.baosight.po.DriverPo;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.openqa.selenium.By;
import org.openqa.selenium.Dimension;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriverService;
import org.openqa.selenium.remote.DesiredCapabilities;

import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;

public class PhontomJsUtil {

    public static DriverPo getHtml(String loginUrl) {

        DriverPo dp = new DriverPo();
        String htmlStr = null;
        WebDriver driver = null;
        try {
            driver = getPhantomJSDriver();
            driver.manage().window().setSize(new Dimension(1920, 1080));
            driver.get(loginUrl);
            Thread.sleep(1000);
            driver.findElement(By.xpath("//*[@id=\"hp-login-user\"]")).sendKeys("3paradm");
            Thread.sleep(1000);
            System.out.println("用户");
            driver.findElement(By.xpath("//*[@id=\"hp-login-password\"]")).sendKeys("3pardata");
            Thread.sleep(1000);
            System.out.println("登密码");
            driver.findElement(By.xpath("//*[@id=\"hp-login-button\"]")).click();
            System.out.println("登陆成功");
            Thread.sleep(5000);
            // 关闭弹窗
            driver.navigate().refresh();
            Thread.sleep(7000);
            dp.setDriver(driver);
            htmlStr = driver.getPageSource();
            dp.setHtmlStr(htmlStr);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } /*
         * finally { driver.quit(); }
         */

        return dp;

    }

    // 驱动加载
    public static WebDriver getPhantomJSDriver() throws Exception {
        DesiredCapabilities dcaps = new DesiredCapabilities();

        //InputStream in = PhontomJsUtil.class.getClassLoader().getResourceAsStream("phantomjs-2.1.1-windows/bin/phantomjs.exe");
        dcaps.setCapability(PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY,loadPhantomJS());
        //PhontomJsUtil.class.getClassLoader().getResource("");
        dcaps.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS,
                new String[] { "--web-security=false", "--ignore-ssl-errors=true" });

        dcaps.setCapability(PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "webSecurityEnabled", false);
        dcaps.setCapability(PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "XSSAuditingEnabled", false);

        WebDriver driver = new PhantomJSDriver(dcaps);
        return driver;
    }

    private static String loadPhantomJS() {
        String phantomJs = "phantomjs.exe";
        try {
            InputStream in = PhontomJsUtil.class.getClassLoader().getResourceAsStream("phantomjs-2.1.1-windows/bin/" + phantomJs);
            File fileOut = new File("D:/sunbeamApps/" + phantomJs);
            OutputStream out = FileUtils.openOutputStream(fileOut);
            IOUtils.copy(in, out);
            in.close();
            out.close();
            return fileOut.getAbsolutePath();
        } catch (Exception e) {
            return "";
        }
    }

}
package com.baosight.utils;

import java.io.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;


public class XmlUtil {

    /**
     * 通过文件的路径获取xml的document对象
     *
     * @param path    文件的路径
     * @return		返回文档对象
     */
    public static Document getXMLByFilePath(InputStream path) {
        if (null == path) {
            return null;
        }
        Document document = null;
        try {
            SAXReader reader = new SAXReader();
            document = reader.read(path);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return document;
    }

    /**
     * 获取保存信息
     *
     * @param path
     */

    public static Map<String, String> getRequest(InputStream path){
        Map<String, String> mapRequest = new HashMap<>();

        Document document = XmlUtil.getXMLByFilePath(path);
        Element rootElm = document.getRootElement();
        for(Iterator iterator = rootElm.element("T_RJJH_DIA_13_42_LOG-netDevice-10.240.13.421").elementIterator();iterator.hasNext();){
            Element element = (Element) iterator.next();
            Iterator provinces = element.elementIterator();
            while (provinces.hasNext()) {
                Element enEle = (Element) provinces.next();
                mapRequest.put(enEle.getName(), enEle.getText());
            }
        }

        return mapRequest;
    }


    /**
     * 获取页面信息
     *
     * @param path
     */

    public static Map<String, String> getPage(InputStream path){
        Map<String, String> page = new HashMap<>();

        Document document = XmlUtil.getXMLByFilePath(path);
        Element rootElm = document.getRootElement();
        for (Iterator iterator = rootElm.element("T_RJJH_DIA_13_42_LOG-netDevice-10.240.13.421")
                .elementIterator(); iterator.hasNext();) {
            Element element = (Element) iterator.next();
            Iterator provinces = element.elementIterator();
            while (provinces.hasNext()) {
                Element enEle = (Element) provinces.next();
                if (enEle.getName().equals("request")) {
                    Iterator lastIter = enEle.elementIterator();
                    while (lastIter.hasNext()) {
                        Element lastEle = (Element) lastIter.next();
                        page.put(lastEle.getName(), lastEle.getText());
                    }
                }
            }
        }

        return page;
    }



    /**
     * 获取element的单个属性
     * @param node		需要获取属性的节点对象
     * @param attr		需要获取的属性值
     * @return			返回属性的值
     */
    public static String getAttribute(Element node,String attr){
        if(null == node||attr==null||"".equals(attr)){
            return "";
        }
        return node.attributeValue(attr);
    }

    /**
     * 去掉声明头的(即<?xml...?>去掉)
     *
     * @param document
     * @param charset
     * @return
     */
    public static String documentToStringNoDeclaredHeader(Document document, String charset) {
        String xml = documentToStringNoDeclaredHeader(document, charset);
        return xml.replaceFirst("\\s*<[^<>]+>\\s*", "");
    }

}

package com.baosight.utils;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Map;

public class JDBCUtil {

    public static Connection getConnection(Connection conn,Map<String, String> map) throws ClassNotFoundException {
        Class.forName(map.get("mysqlDriver"));
        if (conn == null) {
            try {
                conn = DriverManager.getConnection(map.get("mysqlUrl"), map.get("mysqlAdmin"),
                        map.get("mysqlPassword"));
            } catch (SQLException e) {
                throw new RuntimeException(e);
            }
        }
        return conn;
    }

    public static void close(Connection conn, PreparedStatement prep, ResultSet rs) {
        if (rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            } finally {
                rs = null;
            }
        }
        if (prep != null) {
            try {
                prep.close();
            } catch (SQLException e) {
                e.printStackTrace();
            } finally {
                prep = null;
            }
        }
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            } finally {
                conn = null;
            }
        }
    }

}
package com.baosight.utils;

import java.security.MessageDigest;

public class MD5Util {

    private static String byteArrayToHexString(byte b[]) {
        StringBuffer resultSb = new StringBuffer();
        for (int i = 0; i < b.length; i++)
            resultSb.append(byteToHexString(b[i]));

        return resultSb.toString();
    }

    private static String byteToHexString(byte b) {
        int n = b;
        if (n < 0)
            n += 256;
        int d1 = n / 16;
        int d2 = n % 16;
        return hexDigits[d1] + hexDigits[d2];
    }

    /**
     * 返回MD5
     *
     * @return
     */
    public static String MD5Encode(String origin, String charsetname) {
        String resultString = null;
        try {
            resultString = new String(origin);
            MessageDigest md = MessageDigest.getInstance("MD5");
            if (charsetname == null || "".equals(charsetname))
                resultString = byteArrayToHexString(md.digest(resultString.getBytes()));
            else
                resultString = byteArrayToHexString(md.digest(resultString.getBytes(charsetname)));
        } catch (Exception exception) {
            System.out.println();
        }
        return resultString;
    }

    private static final String hexDigits[] = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d",
            "e", "f" };

}

package com.baosight.run;

import java.io.InputStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Map;

import com.baosight.po.DriverPo;
import com.baosight.utils.*;
import org.jsoup.Jsoup;
import org.openqa.selenium.WebDriver;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;

public class App implements Job {
    static DriverPo dp = null;
    static PreparedStatement pstmt = null;
    static InputStream is = App.class.getClassLoader().getResourceAsStream("appConfig.xml");
    static Map<String, String> map = XmlUtil.getRequest(is);
    static InputStream in = App.class.getClassLoader().getResourceAsStream("appConfig.xml");
    static Map<String, String> mapPage = XmlUtil.getPage(in);

    public static void main(String[] args) {
        Connection conn = null;

        try {
            conn = JDBCUtil.getConnection(conn, map);
            for (Map.Entry<String, String> entry : map.entrySet()) {
                String mapKey = entry.getKey();
                String mapValue = entry.getValue();
                if (mapKey.equals("mysqlAdmin") || mapKey.equals("mysqlPassword")) {
                    pstmt = conn.prepareStatement("INSERT into cvs (attrKey,attrValue) VALUES (?,?)");
                    String mapSetValue = MD5Util.MD5Encode(mapValue, "utf-8");
                    pstmt.setString(1, mapKey);
                    pstmt.setString(2, mapSetValue);
                    int result = pstmt.executeUpdate();

                    if (result == 1) {
                        System.out.println("数据插入成功!");
                    } else {
                        System.out.println("数据插入失败!");
                    }
                } else if (mapKey.equals("url")) {
                    StringBuffer sql = new StringBuffer();
                    sql.append("INSERT into cvs (attrKey,attrValue) VALUES ");
                    dp = PhontomJsUtil.getHtml(mapValue);
                    String htmlStr = dp.getHtmlStr();
                    org.jsoup.nodes.Document documentHtml = Jsoup.parse(htmlStr);
                    //documentHtml.
                    org.jsoup.nodes.Element valueEle = documentHtml.selectFirst(mapPage.get("equipmentState"));
                    sql.append("('" + "equipmentState" + "','" + valueEle.text() + "'),");
                    org.jsoup.nodes.Element allocValue = documentHtml.selectFirst(mapPage.get("diskArraryUsed"));
                    sql.append("('" + "diskArraryUsed" + "','" + allocValue.text() + "'),");
                    org.jsoup.nodes.Element freeValue = documentHtml.selectFirst(mapPage.get("diskArraryFree"));
                    sql.append("('" + "diskArraryFree" + "','" + freeValue.text() + "'),");
                    org.jsoup.nodes.Element totalValue = documentHtml.selectFirst(mapPage.get("diskArraryTotal"));
                    sql.append("('" + "diskArraryTotal" + "','" + totalValue.text() + "'),");
                    String a = allocValue.text();
                    String b = totalValue.text();
                    String q = a.substring(0, a.indexOf(" ")).substring(0, a.indexOf(","))+a.substring(0, a.indexOf(" ")).substring(a.indexOf(",")+1);
                    String w = b.substring(0, b.indexOf(" ")).substring(0, b.indexOf(","))+b.substring(0, b.indexOf(" ")).substring(b.indexOf(",")+1);
                    int c = (int) (Double.parseDouble(q)/Double.parseDouble(w)*100);

                    sql.append("('" + "diskArrayUtilization" + "','" + c + "%')");
                    pstmt = conn.prepareStatement(sql.toString());
                    int result = pstmt.executeUpdate();
                    if (result > 0) {
                        System.out.println("数据插入成功!");
                    } else {
                        System.out.println("数据插入失败!");
                    }

                }else{
                    pstmt = conn.prepareStatement("INSERT into cvs (attrKey,attrValue) VALUES (?,?)");
                    pstmt.setString(1, mapKey);
                    pstmt.setString(2, mapValue);
                    int result = pstmt.executeUpdate();
                    if (result >0) {
                        System.out.println("数据插入成功!");
                    } else {
                        System.out.println("数据插入失败!");
                    }
                }
            }
            JDBCUtil.close(conn, pstmt, null);
            QuartzUtil.handleSimpleTrigger("44033", "3333", "44033", "3333", App.class, 240);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }finally {
            JDBCUtil.close(conn, pstmt, null);
            // dp.getDriver().quit();
        }

    }

    /**
     *
     */
    @Override
    public void execute(JobExecutionContext context) throws JobExecutionException {
        Connection conn = null;
        WebDriver driverJob = null;
        try {
            conn = JDBCUtil.getConnection(conn, map);
            conn.setAutoCommit(false);
            driverJob = dp.getDriver();
            driverJob.navigate().refresh();
            Thread.sleep(6000);
            String htmlStri = driverJob.getPageSource();
            org.jsoup.nodes.Document documentHtml = Jsoup.parse(htmlStri);
            org.jsoup.nodes.Element valueEle = documentHtml.selectFirst(mapPage.get("equipmentState"));
            pstmt = conn.prepareStatement(
                    "update cvs set attrValue='" + valueEle.text() + "' where attrKey='" + "equipmentState" + "'");
            pstmt.addBatch();
            org.jsoup.nodes.Element allocValue = documentHtml.selectFirst(mapPage.get("diskArraryUsed"));
            pstmt = conn.prepareStatement(
                    "update cvs set attrValue='" + allocValue.text() + "' where attrKey='" + "diskArraryUsed" + "'");
            System.out.println(allocValue.text());
            pstmt.addBatch();
            org.jsoup.nodes.Element freeValue = documentHtml.selectFirst(mapPage.get("diskArraryFree"));
            pstmt = conn.prepareStatement(
                    "update cvs set attrValue='" + freeValue.text() + "' where attrKey='" + "diskArraryFree" + "'");
            pstmt.addBatch();
            org.jsoup.nodes.Element totalValue = documentHtml.selectFirst(mapPage.get("diskArraryTotal"));
            pstmt = conn.prepareStatement(
                    "update cvs set attrValue='" + totalValue.text() + "' where attrKey='" + "diskArraryTotal" + "'");
            pstmt.addBatch();
            String a = allocValue.text();
            String b = totalValue.text();
            String q = a.substring(0, a.indexOf(" ")).substring(0, a.indexOf(","))+a.substring(0, a.indexOf(" ")).substring(a.indexOf(",")+1);
            String w = b.substring(0, b.indexOf(" ")).substring(0, b.indexOf(","))+b.substring(0, b.indexOf(" ")).substring(b.indexOf(",")+1);
            int c = (int) (Double.parseDouble(q)/Double.parseDouble(w)*100);
            pstmt = conn.prepareStatement(
                    "update cvs set attrValue='" +c+"%"+ "' where attrKey='" + "diskArrayUtilization" + "'");
            pstmt.addBatch();
            pstmt.executeBatch();
            conn.commit();
            System.out.println("修改成功");
            conn.setAutoCommit(true);
        } catch (SQLException | InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            JDBCUtil.close(conn, pstmt, null);
            System.out.println("数据库连接关闭");
        }

    }
}

package com.baosight.po;

import org.openqa.selenium.WebDriver;

public class DriverPo {

    WebDriver driver;

    String htmlStr;

    public WebDriver getDriver() {
        return driver;
    }

    public void setDriver(WebDriver driver) {
        this.driver = driver;
    }

    public String getHtmlStr() {
        return htmlStr;
    }

    public void setHtmlStr(String htmlStr) {
        this.htmlStr = htmlStr;
    }

}

maven依赖

<dependencies>
		<dependency>
			<groupId>net.sf.cssbox</groupId>
			<artifactId>cssbox</artifactId>
			<version>4.14</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/org.quartz-scheduler/quartz -->
		<dependency>
			<groupId>org.quartz-scheduler</groupId>
			<artifactId>quartz</artifactId>
			<version>2.2.3</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>3.8.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>us.codecraft</groupId>
			<artifactId>webmagic-selenium</artifactId>
			<version>0.5.2</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
		<dependency>
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.11.3</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/dom4j/dom4j -->
		<dependency>
			<groupId>dom4j</groupId>
			<artifactId>dom4j</artifactId>
			<version>1.6.1</version>
		</dependency>
		<dependency>
    		<groupId>org.slf4j</groupId>
   	 			<artifactId>slf4j-log4j12</artifactId>
    		<version>1.5.6</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/log4j/log4j -->
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>1.2.12</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>
		<dependency>
			<groupId>com.codeborne</groupId>
			<artifactId>phantomjsdriver</artifactId>
			<version>1.2.1</version>
		</dependency>

	</dependencies>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值