Java爬取页面数据导入数据库

在这里插入图片描述

建立实体类

在这里插入图片描述

连接数据库

package util;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;

public class JdbcUtil {

    private static String url = "jdbc:mysql://localhost:3306/exam?serverTimezone=UTC";
    private static String username="数据库名";
    private static String password = "数据库密码";


    static {

        try {
            Class.forName("com.mysql.cj.jdbc.Driver");
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }

    }

    public static Connection getConnection(){
        try {
            return (Connection) DriverManager.getConnection(url,username,password);
        } catch (SQLException throwables) {
            throwables.printStackTrace();
        }
        return null;
    }
}

导入数据库的语句

package dao;

import pojo.Univ;
import util.JdbcUtil;

import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;

public class DataDao {

    public int insert(Univ m){
        Connection conn = JdbcUtil.getConnection();
		//sql语句
        String sql = "insert into tb_sunshine(name,city,manage,school_type,batch,df_universities,df_disciplines,graduate_school,satisfaction) " +
                "values('"+m.getName()+"','"+m.getCity()+"','"+m.getManage()+"','"+m.getSchool_type()+"','"+m.getBatch()+"','"+m.getDf_universities()+"','"+m.getDf_disciplines()+"','"+m.getGraduate_school()+"','"+m.getSatisfaction()+"')";
		//将sql语句输出,如果语句有问题可在数据库中运行该sql语句,根据数据库中运行的结果修改成正确的sql语句
        System.out.println(sql);

        try {
            Statement s = conn.createStatement();
            int i = s.executeUpdate(sql);
            return i;

        } catch (SQLException throwables) {
            System.err.println("数据存储异常:"+throwables.getMessage());
        }finally {
            try {
                conn.close();
            } catch (SQLException throwables) {
                throwables.printStackTrace();
            }
        }
        return 0;
    }
}

爬取数据

package servlet;


import dao.DataDao;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import pojo.Univ;

import java.io.IOException;
import java.util.ArrayList;

public class WramData {

//
    public Document getDocument (String url){
        try {
                return Jsoup.connect(url).get();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return null;
    }

    public static void main(String[] args) {
        WramData t = new WramData();
        ArrayList<String> list = new ArrayList<String>();
        //利用for循环 循环url中的页面 循环数根据具体情况修改
        for(int page = 0;page<=2800;) {
            Document doc = t.getDocument("url"+page);
            page = page+20;
        // 获取目标HTML代码
            //[class=yxk-table]  yxk-table根据页面的class名修改
        Elements elements1 = doc.select("[class=yxk-table]");
        	//改例爬取的数据都在<td>标签中 
        Elements elements2 = elements1.select("td");
        //System.out.println(elements2);  输出爬取的内容查看是否正确并且观察规律
        for (int n= 0;n<=19;n++) {
        	//获取td标签中的数据,td中爬取的数据从0开始,根据规律修改自己的
            String name = elements2.get(0 + (n * 9)).text();
            String city = elements2.get(1 + (n * 9)).text();
            String manage = elements2.get(2 + (n * 9)).text();
            String school_type = elements2.get(3 + (n * 9)).text();
            String batch = elements2.get(4 + (n * 9)).text();

            String df_universities;
            if (elements2.get(5 + (n * 9)).hasText())
                df_universities = String.valueOf('1');
            else df_universities = String.valueOf('0');

        String df_disciplines;
        if (elements2.get(6 + (n * 9)).hasText())
            df_disciplines = String.valueOf('1');
        else df_disciplines = String.valueOf('0');

        String graduate_school;
        if (elements2.get(7 + (n * 9)).hasText())
            graduate_school = String.valueOf('1');
        else graduate_school = String.valueOf('0');

        String satisfaction = elements2.get(8 + (n * 9)).text();

			//调用实体类
            Univ result = new Univ();
            result.setName(name);
            result.setCity(city);
            result.setManage(manage);
            result.setSchool_type(school_type);
            result.setBatch(batch);
            result.setDf_universities(df_universities);
            result.setDf_disciplines(df_disciplines);
            result.setGraduate_school(graduate_school);
            result.setSatisfaction(satisfaction);
			//录取数据库
            DataDao dataDao = new DataDao();
            dataDao.insert(result);
            System.out.println(page);//看爬取多少页了
        }
        }
 }
}

  • 3
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值