java+SqlLite

1、maven工程配置jar依赖:

 <dependency>
      <groupId>org.xerial</groupId>
      <artifactId>sqlite-jdbc</artifactId>
      <version>3.21.0.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
  <!-- jsoup HTML parser library @ https://jsoup.org/ -->
  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
  <version>1.11.2</version>
</dependency>
  </dependencies>
package com.xuqun.archive;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class SQLiteJDBC {
    private static final String Class_Name = "org.sqlite.JDBC";
    private static final String DB_URL = "jdbc:sqlite:D:\\DEV_SPACE\\archive_data\\SQLiteDataBase.db";
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        Connection connection = null;
        try {
        connection = createConnection();
        func1(connection);

        }catch(Exception e){
             e.printStackTrace();
        }finally{
           try {
                if (connection != null)
                    connection.close();
            } catch (SQLException e) {
                // connection close failed.
                System.err.println(e);
            }
        }
    }

    private static void func1(Connection connection) {
         try {
            Statement statement = connection.createStatement();
            //判断是否有表tables的存在。有则删除
            //statement.executeUpdate("drop table if exists nodeInfo");
            //String sql="create table nodeInfo(node_id varchar(255),node_name varchar(255),link_url varchar(255)) "; //基础表
            //String sql="create table nodeRelationship(ancestor  varchar(255),descendant  varchar(255),distance  varchar(255)) "; //关系表
            //statement.executeUpdate(sql);            //创建数据库
            //statement.executeUpdate("insert into nodeInfo values('1','jsoup','https://jsoup.org/')");//向数据库中插入数据
            //statement.executeUpdate("insert into nodeInfo values('2','Cookbook','https://jsoup.org/cookbook/')");
            //statement.executeUpdate("insert into nodeInfo values('3','Extracting data','https://jsoup.org/cookbook/extracting-data/')");
            //statement.executeUpdate("insert into nodeRelationship values('1','1','0')");
            //statement.executeUpdate("insert into nodeRelationship values('1','2','1')");
            //statement.executeUpdate("insert into nodeRelationship values('1','3','2')");
            String headUrl="https://jsoup.org";
            String selectSQL="select * from mainData";
            ResultSet RES=statement.executeQuery(selectSQL);
            int i=1;//父节点
            int j=1;//子节点
            int count=0;//节点之间的距离
            while(RES.next()){
                String dataInfo=RES.getString("data");
                System.out.println("dataInfo:  "+dataInfo);
                Document doc = Jsoup.parse(dataInfo);
                Element masthead = doc.select("div.breadcrumb").first();
                Elements links=masthead.select("a");
                for (Element link : links) {
                    String linkHref = link.attr("href");
                    String linkText = link.text();
                    ResultSet resu=statement.executeQuery("select * from adressInfo where adressName='"+linkText+"' and adressUrl='"+headUrl+""+linkHref+"'");
                    boolean isre=resu.next();
                    if(!resu.next()){
                        String sqladressInfo="insert into adressInfo values('"+i+"','"+linkText+"','"+headUrl+""+linkHref+"')";
                        statement.executeUpdate(sqladressInfo);
                        String sqladressRelattionship="insert into adressRelationship values('"+i+"','"+i+"','0')";
                        //
                    }

                }
                String sqladressRelattionship="insert into adressRelationship values('"+i+"','"+i+"','0')";

            }



            /*String url="https://jsoup.org/cookbook/input/parse-body-fragment";
            String headUrl="https://jsoup.org";
            Document doc = Jsoup.connect(url).get();
            //Element html=doc.html(url);
            System.out.println(doc);
            String sqlInsert="insert into mainData values('3333','22222')";
            statement.executeUpdate(sqlInsert);
            Element masthead = doc.select("div.breadcrumb").first();
            Elements links=masthead.select("a");
            for (Element link : links) {
              String linkHref = link.attr("href");
              String linkText = link.text();
              System.out.println("------------------------------------------------------");
              System.out.println("linkHref :"+headUrl+linkHref);
              System.out.println("linkText : "+linkText);
              String sql="insert into userInfo values('"+linkText+"','"+headUrl+""+linkHref+"')";
              //statement.executeUpdate(sql);
            }

            ResultSet rSet=statement.executeQuery("select * from userInfo");//搜索数据库,将搜索的放入数据集ResultSet中
            while (rSet.next()) {            //遍历这个数据集
                 //System.out.println("节点:"+rSet.getString("node_id"));//依次输出 也可以这样写 rSet.getString(“name”)
                 System.out.println("名称:"+rSet.getString("name"));
                 System.out.println("url:"+rSet.getString("pwd"));
                 }*/
            RES.close();//关闭数据集
            connection.close();//关闭数据库连接
         } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
    // 创建Sqlite数据库连接
    private static Connection createConnection() {
        try {
            Class.forName(Class_Name);
            return DriverManager.getConnection(DB_URL);
        } catch (ClassNotFoundException | SQLException e) {
            e.printStackTrace();
            return null;
        }

    }


}
package com.xuqun.archive;

import java.io.File;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


/**
*   @author:xuqun
*   @time:2018年3月27日上午9:09:42
*/
public class TraverseHTML {

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        String url="https://jsoup.org/cookbook/input/parse-body-fragment";
        //File input = new File("D:\\data\\parser01.html");//获取文件
        String headUrl="https://jsoup.org";

        try {
            Document doc = Jsoup.connect(url).get();
            //Document doc = Jsoup.parse(input, "UTF-8", "http://example.com/");
            //Element content = doc.getElementById("content");
            //Elements links = content.getElementsByTag("a");
            //Elements links = doc.select("a"); //带有href属性的a元素
            Element linka = doc.select("span").first(); 
            System.out.println(doc);


            Element masthead = doc.select("div.breadcrumb").first();
            System.out.println(masthead);
            Elements links=masthead.select("a");
            for (Element link : links) {
              String linkHref = link.attr("href");
              String linkText = link.text();
              System.out.println("------------------------------------------------------");
              System.out.println("linkHref :"+headUrl+linkHref);
              System.out.println("linkText : "+linkText);
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }


    }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值