excel表数据入库到oracle,并做数据映射操作

最近接到一个任务对800条数据做数据映射,800多条数据存储在oracle中的 zxy_department_text中,映射表在excel中

zxy_department_test  其子段EFFPREFECTURECODE没有值,需要对照一个文档,根据orgname查找excel文档和orgname匹配的中文名对应的码,

例如第一个name可以拆出河北省,在excel中找到码为130000 填入到zxy_department_test表

  

 zxy_department_test表

 

orgid

orgname

EFFPREFECTURECODE

1111

河北省中心支公司本部

 130000

1222

北京市通州区支公司

 

 

文档excel内容局部展示:

 

130000

河北省

 

 

130100

 

  石家庄市

 

130102

 

 

    长安区

130104

 

 

    桥西区

130105

 

 

    新华区

130107

 

 

    井陉矿区

130108

 

 

    裕华区

 


1:我能想到的第一种方式是写代码(最差的方:800条用了30分钟,太差了)

先将excel读取出来存入list中

     

package com.zxy.newTest;


import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

//dmuser
public class load_excl {
    List
   
   
    
     lis = new ArrayList
    
    
     
     ();
    public List
     
     
      
       findlist() {
        try {
            Workbook book = Workbook.getWorkbook(new File("D://县及县以上行政区划分代码.xls"));
            // 获得第一个工作表对象
            Sheet sheet = book.getSheet(0);
            // 得到第一列第一行的单元格
            System.out.println("数据添加到列表begin");
            for (int i = 0; i < 3224; i++) {
                int stat = 3;
                Cell cell1 = sheet.getCell(0, i + 2);
                Cell cell2 = sheet.getCell(3, i + 2);

                String result = cell1.getContents();
                String result2 = cell2.getContents();

                if (result == null || result == "" || "".equals(result)) {
                    break;
                }
                if (result2 == null || result2 == "" || "".equals(result2)) {

                    cell2 = sheet.getCell(2, i + 2);
                    result2 = cell2.getContents();
                    stat = 2;
                    if (result2 == null || result2 == "" || "".equals(result2)) {
                        cell2 = sheet.getCell(1, i + 2);
                        result2 = cell2.getContents();
                        stat = 1;
                    }
                }
                String pid = "0";
                switch (stat) {
                    case 1:
                        pid = "0";
                        break;
                    case 2:
                        pid = result.toString().substring(0, 2) + "0000";
                        break;
                    case 3:
                        pid = result.toString().substring(0, 4) + "00";
                        break;

                }
                lis.add(result + "#" + result2 + "#" + pid + "#" + stat);
            }

            System.out.println("数据添加到列表end");
            book.close();
        } catch (Exception e) {
            System.out.println(e);
        }

        return lis;

    }
}





     
     
    
    
   
   

数据插入到库(批处理),读取全部待修改的记录,逐条拆分待匹配字段,拆出来的字段,逐条模糊查询,返回结果进行update操作

package com.zxy.newTest;

import com.oracle.deploy.update.UpdateCheck;

import javax.sql.DataSource;
import java.sql.*;
import java.util.List;
import java.util.Map;

/**
 * Created by Administrator on 2017/7/31.
 */
public class OracleConnect {


    //数据库驱动对象
    public static final String DRIVER = "oracle.jdbc.driver.OracleDriver";
    //数据库连接地址(数据库名)jdbc:oracle:thin:@ip:1521:liferpt
    public static final String URL = "jdbc:oracle:thin:@ip:1521:liferpt";
    //登陆名
    public static final String USER = "user";
    //登陆密码
    public static final String PWD = "password";
    //创建数据库连接对象
    private Connection con = null;
    private Connection con1 = null;
    private Connection con2 = null;
    //创建数据库预编译对象
    private PreparedStatement ps = null;
    private PreparedStatement ps1 = null;
    private PreparedStatement ps2 = null;
    //创建结果集
    private ResultSet rs = null;
    private ResultSet rs1 = null;
    private ResultSet rs2 = null;
    //创建数据源对象
    public static DataSource source = null;

//  //静态代码块
//  static{
//
//      //初始化配置文件context
//      try {
//          Context context=new InitialContext();
//          source=(DataSource)context.lookup("java:comp/env/jdbc/webmessage");
//      } catch (Exception e) {
//          // TODO Auto-generated catch block
//          e.printStackTrace();
//      }
//
//
//  }

    /**
     * 获取数据库连接
     */
    public Connection getCon(Connection c) {
        try {
            Class.forName(DRIVER);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        try {
            c = DriverManager.getConnection(URL, USER, PWD);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return c;
    }


    /**
     * 关闭所有资源
     */
    public void closeAll(ResultSet r, PreparedStatement p, Connection c) {
        if (r != null)
            try {
                r.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        if (p != null)
            try {
                p.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        if (c != null)
            try {
                c.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
    }

    public void findsql(String sql) {
        con = getCon(con);
        try {
            ps = con.prepareStatement(sql);// 实例化预编译语句
            rs = ps.executeQuery();
            while (rs.next()) {
                // 当结果集不为空时

                String upName = "%" + findname(rs.getString("orgname")) + "%";
                //转换code

                String code = findcode(upName);
                System.out.println(rs.getString("orgname")+";-----:"+upName+":-----:"+code);
                //更改表的数据

                UpdateCode(rs.getString("orgid"), code);
            }
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();

        } finally {
            closeAll(rs, ps, con);
        }
    }

    public String findname(String name) {

        if (name.contains("香港") || name.contains("澳门")) {
            name.substring(0, 2);

        } else if(name.contains("作废")){
            name = "未知";
        }else if (name.contains("经济技术开发区")){
            name = name.split("经济技术开发区")[0];
        }
        else if(name.contains("市)") && name.contains("(")){
                name = name.split("\\)")[0].split("\\(")[1];
        }
        if(name.contains("中心支公司")){
            name = name.split("中心支公司")[0];
        }
        else if(name.contains("本部营销服务部")){
            name = name.split("本部营销服务部")[0];
        } if(name.contains("本部")){
            name = name.split("本部")[0];
        }
         if(name.contains("支公司")){
            name = name.split("支公司")[0];
        }
        if (name.contains("州")) {
            name = name.split("州")[0] + "州";
        }
         if (name.contains("省")) {
             name = name.split("省")[0] + "省";
        }
        if (name.contains("区")) {
            if (name.equals("市辖区")) {
                name = "未知";

            } else if (name.contains("市辖区")) {

                name = name.split("市辖区")[0];
            }else if (name.contains("地区")) {

                name = name.split("地区")[0];
            } else {
                name = name.split("区")[0] + "区";
            }
            if (name.contains("市") && name.contains("区")) {
                if(name.equals("开发区")){
                    name = name.split("市")[0];
                }else {
                    name = name.split("市")[1];
                }
            }

        }
        if (name.contains("市")) {

            name = name.split("市")[0] + "市";

        } else if (name.contains("县")) {
            name = name.split("县")[0] + "县";
        }
        if (name.contains("分公司")) {
            name = name.split("分公司")[0];
        }
        return name;

    }


    //疯了,设置这么多变量,有没有简单的办法呀
    public void UpdateCode(String id, String code) {
        con2 = getCon(con2);
        try {

            String sql = "update zxy_department_test set EFFPREFECTURECODE=?  where orgid=" + id;
            ps2 = con2.prepareStatement(sql);
            ps2.setString(1, code);
            ps2.executeUpdate();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            closeAll(rs2, ps2, con2);
        }
    }

    public String findcode(String name) {
        String code = "";
        con1 = getCon(con1);
        try {
            String sql = "select * from zxy_department1 where name like ?";// 预编译语句,“?”代表参数
            ps1 = con1.prepareStatement(sql);
            ps1.setString(1, name);// 设置参数,前面的1表示参数的索引,而不是表中列名的索引
            rs1 = ps1.executeQuery();
            while (rs1.next()) {
                code = findname(rs1.getString("did"));
            }

        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();

        } finally {
            closeAll(rs1, ps1, con1);

        }

        return code;

    }

    //数据导入到表
    public void excel_loadDB() {
        load_excl excl = new load_excl();
        con = getCon(con);
        try {
            String sql = ("insert into zxy_department1(did,name,pid,lev) values (?,?,?,?)");
            ps = con.prepareStatement(sql.toString());
            List
    
    
     
      list = excl.findlist();

            for (int i = 0; i < list.size(); i++) {
                String exLog = list.get(i);
                String[] tt = exLog.split("#");
                ps.setString(1, tt[0]);
                ps.setString(2, tt[1]);
                ps.setString(3, tt[2]);
                ps.setString(4, tt[3]);

                // 把一个SQL命令加入命令列表
                ps.addBatch();

                if (i % 1000 == 0) {
                    ps.executeBatch();
                    con.commit();
                    System.out.println("i:" + i);
                }

            }
            // 执行批量更新
            ps.executeBatch();
            // 语句执行完毕,提交本事务
            con.commit();
            System.out.println("入库完毕");

        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            closeAll(rs, ps, con);
        }
    }

}

    
    
整体的额调用
package com.zxy.newTest;


import java.sql.*;

/**
 * Created by smile on 2017/7/29.
 */
public class select {
    public static void main(String args[]) throws SQLException {

        OracleConnect oral = new OracleConnect();
          oral.findsql("select * from zxy_department_test t WHERE T.EFFPREFECTURECODE IS NULL");
       // oral.excel_loadDB();


    }


}

2:优化(做批处理,减少数据库访问开销,使用缓存   时间用来)

先将excel读取出来存入list中不用管
package com.zxy.newTest;


import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

//dmuser
public class load_excl {
    List
     
     
      
       lis = new ArrayList
      
      
       
       ();
    public List
       
       
        
         findlist() {
        try {
            Workbook book = Workbook.getWorkbook(new File("D://县及县以上行政区划分代码.xls"));
            // 获得第一个工作表对象
            Sheet sheet = book.getSheet(0);
            // 得到第一列第一行的单元格
            System.out.println("数据添加到列表begin");
            for (int i = 0; i < 3224; i++) {
                int stat = 3;
                Cell cell1 = sheet.getCell(0, i + 2);
                Cell cell2 = sheet.getCell(3, i + 2);

                String result = cell1.getContents();
                String result2 = cell2.getContents();

                if (result == null || result == "" || "".equals(result)) {
                    break;
                }
                if (result2 == null || result2 == "" || "".equals(result2)) {

                    cell2 = sheet.getCell(2, i + 2);
                    result2 = cell2.getContents();
                    stat = 2;
                    if (result2 == null || result2 == "" || "".equals(result2)) {
                        cell2 = sheet.getCell(1, i + 2);
                        result2 = cell2.getContents();
                        stat = 1;
                    }
                }
                String pid = "0";
                switch (stat) {
                    case 1:
                        pid = "0";
                        break;
                    case 2:
                        pid = result.toString().substring(0, 2) + "0000";
                        break;
                    case 3:
                        pid = result.toString().substring(0, 4) + "00";
                        break;

                }
                lis.add(result + "#" + result2 + "#" + pid + "#" + stat);
            }

            System.out.println("数据添加到列表end");
            book.close();
        } catch (Exception e) {
            System.out.println(e);
        }

        return lis;

    }
}





       
       
      
      
     
     

将要匹配的数据字典读入缓存,修改使用批处理

package com.zxy.newTest;

import com.oracle.deploy.update.UpdateCheck;

import javax.sql.DataSource;
import javax.swing.text.MaskFormatter;
import java.sql.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by Administrator on 2017/7/31.
 */
public class OracleConnect {


    //数据库驱动对象
    public static final String DRIVER = "oracle.jdbc.driver.OracleDriver";
    //数据库连接地址(数据库名)jdbc:oracle:thin:@ip:1521:liferpt
    public static final String URL = "jdbc:oracle:thin:@ip:1521:liferpt";
    //登陆名
    public static final String USER = "user";
    //登陆密码
    public static final String PWD = "password";
    //创建数据库连接对象
    private Connection con = null;
    private Connection con1 = null;
    private Connection con2 = null;
    //创建数据库预编译对象
    private PreparedStatement ps = null;
    private PreparedStatement ps1 = null;
    private PreparedStatement ps2 = null;
    //创建结果集
    private ResultSet rs = null;
    private ResultSet rs1 = null;
    private ResultSet rs2 = null;
    //创建数据源对象
    public static DataSource source = null;

//  //静态代码块
//  static{
//
//      //初始化配置文件context
//      try {
//          Context context=new InitialContext();
//          source=(DataSource)context.lookup("java:comp/env/jdbc/webmessage");
//      } catch (Exception e) {
//          // TODO Auto-generated catch block
//          e.printStackTrace();
//      }
//
//
//  }

    /**
     * 获取数据库连接
     */
    public Connection getCon(Connection c) {
        try {
            Class.forName(DRIVER);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        try {
            c = DriverManager.getConnection(URL, USER, PWD);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return c;
    }


    /**
     * 关闭所有资源
     */
    public void closeAll(ResultSet r, PreparedStatement p, Connection c) {
        if (r != null)
            try {
                r.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        if (p != null)
            try {
                p.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        if (c != null)
            try {
                c.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
    }

    public void findsql(String sql) {
        con = getCon(con);
        try {
            ps = con.prepareStatement(sql);// 实例化预编译语句
            rs = ps.executeQuery();
            int i=0;
            Map
     
     
      
       mm=new HashMap
      
      
       
       ();
            while (rs.next()) {
                // 当结果集不为空时
               // System.out.println("继续添加");
         i++;
               String upName = rs.getString("orgname").trim();
                System.out.print(upName);
                //转换code
                String code = findcode(upName);
                System.out.println(":-----:"+code);
                //更改表的数据
                mm.put(rs.getString("orgid"), code);
                 if(i%1000==0){
                     UpdateCode(mm);
                     mm.clear();
                     System.out.println("准备修改数据传送第"+i+"条");
                 }

            }
            UpdateCode(mm);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();

        } finally {
            closeAll(rs, ps, con);
        }
    }

    public String findcode(String name) {
        //这里面就可以直接用map判断了,可以在这里分多级判断,但是本次的数据多级的少也不好切分就不判断了,只在最后转码

        if (name.contains("香港") || name.contains("澳门")) {
           name= name.substring(0, 2);

        } else if(name.contains("作废")){
            name = "未知";
        }else if (name.contains("经济技术开发区")){
            name = name.split("经济技术开发区")[0];
        }
        else if(name.contains("市)") && name.contains("(")){
                name = name.split("\\)")[0].split("\\(")[1];
        }
        if(name.contains("中心支公司")){
            name = name.split("中心支公司")[0];
        }
        else if(name.contains("本部营销服务部")){
            name = name.split("本部营销服务部")[0];
        } if(name.contains("本部")){
            name = name.split("本部")[0];
        }
         if(name.contains("支公司")){
            name = name.split("支公司")[0];
        }
        if (name.contains("州")) {
            name = name.split("州")[0] + "州";
        }
         if (name.contains("省")) {
             name = name.split("省")[0] + "省";
        }
        if (name.contains("区")) {
            if (name.equals("市辖区")) {
                name = "未知";

            } else if (name.contains("市辖区")) {

                name = name.split("市辖区")[0];
            }else if (name.contains("地区")) {

                name = name.split("地区")[0];
            } else {
                name = name.split("区")[0] + "区";
            }
            if (name.contains("市") && name.contains("区")) {
                if(name.equals("开发区")){
                    name = name.split("市")[0];
                }else {
                    name = name.split("市")[1];
                }
            }

        }
        if (name.contains("市")) {

            name = name.split("市")[0] + "市";

        } else if (name.contains("县")) {
            name = name.split("县")[0] + "县";
        }
        if (name.contains("分公司")) {
            name = name.split("分公司")[0];
        }
        System.out.print(":-----"+name);
        return map.get(name);


    }


    //疯了,设置这么多变量,有没有简单的办法呀
    public void UpdateCode(Map
       
       
        
         Mas) {
        con2 = getCon(con2);
        try {

            String sql = "update zxy_department_test set EFFPREFECTURECODE=?  where orgid=?";
            ps2 = con2.prepareStatement(sql);
            for (String key:Mas.keySet()){
                ps2.setString(1, key);
                ps2.setString(2, Mas.get(key));
                ps2.addBatch();
            }
            ps2.executeBatch();
            System.out.println("修改完成一批数据");


        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            closeAll(rs2, ps2, con2);
        }
    }



    //数据导入到表
    public void excel_loadDB() {
        load_excl excl = new load_excl();
        con = getCon(con);
        try {
            String sql = ("insert into zxy_department1(did,name,pid,lev) values (?,?,?,?)");
            ps = con.prepareStatement(sql.toString());
            List
        
        
         
          list = excl.findlist();

            for (int i = 0; i < list.size(); i++) {
                String exLog = list.get(i);
                String[] tt = exLog.split("#");
                ps.setString(1, tt[0]);
                ps.setString(2, tt[1]);
                ps.setString(3, tt[2]);
                ps.setString(4, tt[3]);

                // 把一个SQL命令加入命令列表
                ps.addBatch();

                if (i % 1000 == 0) {
                    ps.executeBatch();
                    con.commit();
                    System.out.println("i:" + i);
                }

            }
            // 执行批量更新
            ps.executeBatch();
            // 语句执行完毕,提交本事务
            con.commit();
            System.out.println("入库完毕");

        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            closeAll(rs, ps, con);
        }
    }

    static Map
         
         
           map=new HashMap 
          
            (); //数据读入到map中,Map 
           <名,码>
             public Map 
            
              getmap(String sql){ con = getCon(con); try { System.out.println("录入map开始"); ps = con.prepareStatement(sql);// 实例化预编译语句 rs = ps.executeQuery(); while (rs.next()) { map.put(rs.getString("name").trim(),rs.getString("did").trim()); } System.out.println("录入map完毕"); } catch (Exception e) { e.printStackTrace(); } finally { closeAll(rs, ps, con); } return map; } } 
             
            
           
         
        
        
       
       
      
      
     
     

整体调用
package com.zxy.newTest;


import java.sql.*;

/**
 * Created by smile on 2017/7/29.
 */
public class select {
    public static void main(String args[]) throws SQLException {
        long start=System.currentTimeMillis();   //获取开始时间
        OracleConnect oral = new OracleConnect();
        oral.getmap("select * from zxy_department1");
        oral.findsql("select * from zxy_department_test t ");
       // oral.excel_loadDB();
        long end=System.currentTimeMillis(); //获取结束时间
        System.out.println("程序运行时间: "+(end-start)+"ms");

    }


}

但是map的模糊匹配是个问题 ,可以解决,使用key循环,包含比对   800条记录的运行时间-----》程序运行时间:29967ms

由原先的30min缩减到29s

当适当的调整批处理的条数时时间也在减少21879ms-21s

3:换一种方式,直接在数据库中操作(plsql) 20s

plsql支持文档导入

接下来是程序部分

  创建函数:findname(),函数写的有点low

create or replace function findname(orgnamein varchar2)

return varchar2 

as

 name_dep varchar2(50) := orgname;

begin

        if instr(name_dep,'作废')>0

          then

          name_dep:='未知';

        elsif  instr(name_dep,'经济技术开发区')>0

          then 

          name_dep:=substr(name_dep,0,instr(name_dep,'经济技术开发区')-1);

         elsif  (instr(name_dep,')')>0 and instr(name_dep,'(')>0)

          then 

           name_dep:=substr(name_dep,0,instr(name_dep,')')-1);

           name_dep:=substr(name_dep,instr(name_dep,'(')+1);

         elsif  instr(name_dep,'中心支公司')>0

          then 

          name_dep:=substr(name_dep,0,instr(name_dep,'中心支公司')-1);

        elsif  instr(name_dep,'本部营销服务部')>0

          then 

          name_dep:=substr(name_dep,0,instr(name_dep,'本部营销服务部')-1);

        end if;

      

        if instr(name_dep,'本部')>0

          then 

          name_dep:=substr(name_dep,0,instr(name_dep,'本部')-1);

        end if;

        If instr(name_dep,'支公司')>0

          then 

          name_dep:=substr(name_dep,0,instr(name_dep,'支公司')-1);

        end if;

        if instr(name_dep,'')>0

            then

            name_dep:=substr(name_dep,0,instr(name_dep,'')-1);

        end if;

        if instr(name_dep,'')>0

         then

          name_dep:=substr(name_dep,0,instr(name_dep,'')-1);

        end if;

        if instr(name_dep,'')>0

        then

          

           if (name_dep='市辖区')

            then

              name_dep:='未知';

           elsif  instr(name_dep,'地区')>0

            then

               name_dep:=substr(name_dep,0,instr(name_dep,'地区')-1);

           elsif  instr(name_dep,'')>0

            then

                 name_dep:=substr(name_dep,0,instr(name_dep,''));

          

           elsif instr(name_dep,'')>0 

            then

               if instr(name_dep,'开发区')>0 

               then

                 name_dep:=substr(name_dep,0,instr(name_dep,''));

               else

                  --要注意直接使用else不用then

                 name_dep:=substr(name_dep,instr(name_dep,'')+1);  

               end if;  

          end if;

          

        end if;

        if instr(name_dep,'')>0

        then

        name_dep:=substr(name_dep,0,instr(name_dep,''));

        elsif  instr(name_dep,'')>0

        then

        name_dep:=substr(name_dep,0,instr(name_dep,''));

        end if;

        if instr(name_dep,'分公司')>0

        then 

        name_dep:=substr(name_dep,0,instr(name_dep,'分公司')-1);

        end if;

       

        return '%'|| name_dep ||'%';

end findname;

   修改语句,执行函数

update  zxy_department_test a set (EFFPREFECTURECODE)=(select b.did from zxy_department1 b where b.name like findname(a.orgname) )


这种方式约20秒,看来还是直接操作数据库快呀

  当然这中间还要各种测试函数删除函数

//删除函数

--DROP FUNCTION findname

//查询函数的状态status =VALID表示创建成功

--select object_name, object_type, status from user_objects where lower(object_name)='findname';

   


上面其实就是数据从文件导入到数据库,数据加工(映射)的两种形式

如果需要hadoop支持不太适合这种场景,但可以实现

但是可以将数据读入到hdfs文件中-》使用hive做数据关联,再通过sqoop将数据导入到oracle等关系型数据库(貌似有点麻烦)  




















评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值