Web Magic 学习第三天

1.将网上爬到的数据存到数据库中

import com.mysql.jdbc.StringUtils;
import org.Movie;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.security.PublicKey;
import java.util.ArrayList;
import java.util.List;

public class MoviePerformerProcess implements PageProcessor {
    public  String  id;
    public  String getURL(String id) {
        String URL = "https://www.1905.com/mdb/film/" + id + "/performer/?fr=mdbypsy_dh_yzry";
        this.id=id;
        return URL;
    }
    //private final static String URL="https://www.1905.com/mdb/film/%s/performer/?fr=mdbypsy_dh_yzry";
    private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);

    @Override
    public void process(Page page) {
        //从页面上找所有的< li class="f1 line">
        List<Selectable> list = page.getHtml().css("div.secPage-actors").nodes();
        Movie movie = new Movie();
        String dir="";
        String actor="";
        String wirter="";
        for (Selectable i : list
        ) {
            String author = i.xpath("//div/h3/text()").toString();
            if ("导演".equals(author)) {
                String[] a = i.toString().split("alt=\"");
                for (int j = 1; j < a.length; j++) {
                    dir=dir+a[j].split("\"></a> ")[0]+";";
                }
            } else if ("编剧".equals(author)) {
                String[] a = i.toString().split("alt=\"");
                for (int j = 1; j < a.length; j++) {
                    wirter=wirter+a[j].split("\"></a> ")[0]+";";
                }
            } else if ("演员".equals(author)) {
                String[] a = i.toString().split("alt=\"");
                for (int j = 1; j < a.length; j++) {
                    actor=actor+a[j].split("\"></a> ")[0]+";";
                }
            }
        }
        movie.setMovie_dir(dir);
        movie.setMovie_actor(actor);
        movie.setMovie_wirter(wirter);
        movie.setMovie_id(id);
        page.putField("Movie", movie);
    }

    @Override
    public Site getSite() {
        return site;

    }
}

import C.DataSourceFactory;
import org.Movie;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;

public class MoviePerformerPipeline  implements Pipeline {
    public void process(ResultItems resultItems, Task task) {
        Movie movie=resultItems.get("Movie");
        DataSource ds= DataSourceFactory.getDataSource();
        Connection conn=null;
        try {
            System.out.println("开始保存");
            conn= ds.getConnection();
            PreparedStatement pst=conn.prepareStatement("update scott.movie set director=?,writer=?,actor=? where id=?;");
            pst.setString(1,movie.getMovie_dir());
            pst.setString(2,movie.getMovie_wirter());
            pst.setString(3,movie.getMovie_actor());
            pst.setString(4,movie.getMovie_id());
            pst.executeUpdate();
        } catch (SQLException e) {
            System.out.println("保存失败");
            e.printStackTrace();
        }
        if(conn!=null){
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
}

import C.DataSourceFactory;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;

import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

public class MoviePerformerSpider {
    public static  void start(String id){
        MoviePerformerProcess moviePerformerProcess=new MoviePerformerProcess();

            Spider.create(moviePerformerProcess)
                    .addUrl(moviePerformerProcess.getURL(id))
                    .addPipeline(new MoviePerformerPipeline())
                    .run();
    }
    public static void main(String[] args) throws SQLException {
        DataSource ds= DataSourceFactory.getDataSource();
        Connection conn=null;
        ResultSet resultSet = null;
        try {
            conn= ds.getConnection();
            PreparedStatement pst=conn.prepareStatement("select id from movie");
            resultSet=pst.executeQuery();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        while(true){
            try {
                if (!resultSet.next()) break;
                String id=resultSet.getString("id");
                start(id);
            } catch (SQLException e) {
                e.printStackTrace();
            }

        }
        conn.close();

    }
}

2.最后的结果

在这里插入图片描述

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值