webmagic爬数据写到mysql中

六个部分,,,四个类, ,一个依赖,,还有一个数据库    

driver=com.mysql.cj.jdbc.Driver
url=jdbc:mysql://localhost:3306/webmagic?useUnicode=true&characterEncoding=utf-8&useSSL=false&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC
username= root
password = 123456



create table t_news(
id int not null primary key auto_increment,
title varchar(225),
source varchar(50),
type varchar(225)
)charset utf8 collate utf8_general_ci;






import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

public class JdbcUtil {

    private static String driver;
    private static String url;
    private static String username;
    private static String password;

    static {// 静态方法块,加载驱动

        InputStream is = JdbcUtil.class.getResourceAsStream("/driver.properties");

        Properties prop = new Properties();

        try {
            prop.load(is);
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        driver = prop.getProperty("driver");
        url = prop.getProperty("url");
        username = prop.getProperty("username");
        password = prop.getProperty("password");
        try {
            Class.forName(driver);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }

    public static void executeSQL(String sql) {

        Connection conn = getConn();
        PreparedStatement ps = null;
        try {
            ps = conn.prepareStatement(sql);

            ps.executeUpdate();

        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            close(conn, ps, null);
        }
    }

    private static Connection getConn() {
        Connection conn = null;
        try {
            conn = DriverManager.getConnection(url, username, password);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return conn;
    }

    private static void close(Connection conn, Statement stat, ResultSet rs) {
        if (rs != null)
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        if (stat != null)
            try {
                stat.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        if (conn != null)
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
    }
}






import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

public class mysqlPipeline implements Pipeline {

    @Override
    public void process(ResultItems resultItems, Task task) {

        String title = resultItems.get("title");             //公司
        String source = resultItems.get("source");               //薪资
        String type = resultItems.get("type");                 //职位

        String sql = "INSERT INTO t_news "
                + "(title, source, type ) VALUES ( '" + //
                title.replace("'", "\\\'") + "', '" + //
                source.replace("'", "\\\'") + "', '" + //
                type.replace("'", "\\\'") + "' );";
        System.out.println(sql);
        JdbcUtil.executeSQL(sql);
    }
}














/**
 * @author 你的名称
 * @createTime 18-8-26
 * @description 招聘网站  爬三项,,数量够了,到了文件夹里,一堆json文件,解决了一个网页只有一个结果的问题
 */

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.List;

public class first implements PageProcessor{

    private Site site = Site.me().setSleepTime(1000).setRetryTimes(3);
    private int count=0;

    @Override
    public void process(Page page) {

        if(page.getUrl().regex(".*position.*").match()){
            page.putField("type",page.getHtml().xpath("//h1[@class='font-green']/text()").toString());
            page.putField("source",page.getHtml().xpath("//div[@class='col-xs-9']/div/b/text()").toString());
            page.putField("title",page.getHtml().xpath("//div[@class='col-xs-9']/div/a[1]/text()").toString());
            count++;
            System.out.println(count++);
        }
      //  List<String> urls = page.getHtml().css("div.pagination").links().regex(".*?type=%.*").all();
        List<String> urlss = page.getHtml().xpath("//a[@class='h4 name over-hide']/@href").all();
        page.addTargetRequests(urlss);
     //   page.addTargetRequests(urls);


    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String args[]){
        Spider.create(new first())
                .addUrl("https://job.oschina.net/search?type=%E8%81%8C%E4%BD%8D%E6%90%9C%E7%B4%A2&key=&exp=0&edu=0&nat=1&city=%E5%85%A8%E5%9B%BD&p=")
                .addPipeline(new ConsolePipeline())
                .addPipeline(new mysqlPipeline())
                .addPipeline(new FilePipeline("D:\\webmagic\\"))
                .thread(4)
                .run();
    }

}





                    

借鉴https://my.oschina.net/anxiaole/blog/783989

e33cf3d4de7cef7e2dd078557c21f54ec19.jpg

转载于:https://my.oschina.net/u/3954304/blog/1935018

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值