最近接到一个任务对800条数据做数据映射,800多条数据存储在oracle中的 zxy_department_text中,映射表在excel中
zxy_department_test表 其子段EFFPREFECTURECODE没有值,需要对照一个文档,根据orgname查找excel文档和orgname匹配的中文名对应的码,
例如第一个name可以拆出河北省,在excel中找到码为130000 填入到zxy_department_test表
zxy_department_test表
orgid | orgname | EFFPREFECTURECODE |
1111 | 河北省中心支公司本部 | 130000 |
1222 | 北京市通州区支公司 |
|
文档excel内容局部展示:
130000 | 河北省 |
|
|
130100 |
| 石家庄市 |
|
130102 |
|
| 长安区 |
130104 |
|
| 桥西区 |
130105 |
|
| 新华区 |
130107 |
|
| 井陉矿区 |
130108 |
|
| 裕华区 |
1:我能想到的第一种方式是写代码(最差的方:800条用了30分钟,太差了)
先将excel读取出来存入list中
package com.zxy.newTest;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
//dmuser
public class load_excl {
List
lis = new ArrayList
();
public List
findlist() {
try {
Workbook book = Workbook.getWorkbook(new File("D://县及县以上行政区划分代码.xls"));
// 获得第一个工作表对象
Sheet sheet = book.getSheet(0);
// 得到第一列第一行的单元格
System.out.println("数据添加到列表begin");
for (int i = 0; i < 3224; i++) {
int stat = 3;
Cell cell1 = sheet.getCell(0, i + 2);
Cell cell2 = sheet.getCell(3, i + 2);
String result = cell1.getContents();
String result2 = cell2.getContents();
if (result == null || result == "" || "".equals(result)) {
break;
}
if (result2 == null || result2 == "" || "".equals(result2)) {
cell2 = sheet.getCell(2, i + 2);
result2 = cell2.getContents();
stat = 2;
if (result2 == null || result2 == "" || "".equals(result2)) {
cell2 = sheet.getCell(1, i + 2);
result2 = cell2.getContents();
stat = 1;
}
}
String pid = "0";
switch (stat) {
case 1:
pid = "0";
break;
case 2:
pid = result.toString().substring(0, 2) + "0000";
break;
case 3:
pid = result.toString().substring(0, 4) + "00";
break;
}
lis.add(result + "#" + result2 + "#" + pid + "#" + stat);
}
System.out.println("数据添加到列表end");
book.close();
} catch (Exception e) {
System.out.println(e);
}
return lis;
}
}
package com.zxy.newTest;
import com.oracle.deploy.update.UpdateCheck;
import javax.sql.DataSource;
import java.sql.*;
import java.util.List;
import java.util.Map;
/**
* Created by Administrator on 2017/7/31.
*/
public class OracleConnect {
//数据库驱动对象
public static final String DRIVER = "oracle.jdbc.driver.OracleDriver";
//数据库连接地址(数据库名)jdbc:oracle:thin:@ip:1521:liferpt
public static final String URL = "jdbc:oracle:thin:@ip:1521:liferpt";
//登陆名
public static final String USER = "user";
//登陆密码
public static final String PWD = "password";
//创建数据库连接对象
private Connection con = null;
private Connection con1 = null;
private Connection con2 = null;
//创建数据库预编译对象
private PreparedStatement ps = null;
private PreparedStatement ps1 = null;
private PreparedStatement ps2 = null;
//创建结果集
private ResultSet rs = null;
private ResultSet rs1 = null;
private ResultSet rs2 = null;
//创建数据源对象
public static DataSource source = null;
// //静态代码块
// static{
//
// //初始化配置文件context
// try {
// Context context=new InitialContext();
// source=(DataSource)context.lookup("java:comp/env/jdbc/webmessage");
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
//
//
// }
/**
* 获取数据库连接
*/
public Connection getCon(Connection c) {
try {
Class.forName(DRIVER);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
c = DriverManager.getConnection(URL, USER, PWD);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return c;
}
/**
* 关闭所有资源
*/
public void closeAll(ResultSet r, PreparedStatement p, Connection c) {
if (r != null)
try {
r.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (p != null)
try {
p.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (c != null)
try {
c.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void findsql(String sql) {
con = getCon(con);
try {
ps = con.prepareStatement(sql);// 实例化预编译语句
rs = ps.executeQuery();
while (rs.next()) {
// 当结果集不为空时
String upName = "%" + findname(rs.getString("orgname")) + "%";
//转换code
String code = findcode(upName);
System.out.println(rs.getString("orgname")+";-----:"+upName+":-----:"+code);
//更改表的数据
UpdateCode(rs.getString("orgid"), code);
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
closeAll(rs, ps, con);
}
}
public String findname(String name) {
if (name.contains("香港") || name.contains("澳门")) {
name.substring(0, 2);
} else if(name.contains("作废")){
name = "未知";
}else if (name.contains("经济技术开发区")){
name = name.split("经济技术开发区")[0];
}
else if(name.contains("市)") && name.contains("(")){
name = name.split("\\)")[0].split("\\(")[1];
}
if(name.contains("中心支公司")){
name = name.split("中心支公司")[0];
}
else if(name.contains("本部营销服务部")){
name = name.split("本部营销服务部")[0];
} if(name.contains("本部")){
name = name.split("本部")[0];
}
if(name.contains("支公司")){
name = name.split("支公司")[0];
}
if (name.contains("州")) {
name = name.split("州")[0] + "州";
}
if (name.contains("省")) {
name = name.split("省")[0] + "省";
}
if (name.contains("区")) {
if (name.equals("市辖区")) {
name = "未知";
} else if (name.contains("市辖区")) {
name = name.split("市辖区")[0];
}else if (name.contains("地区")) {
name = name.split("地区")[0];
} else {
name = name.split("区")[0] + "区";
}
if (name.contains("市") && name.contains("区")) {
if(name.equals("开发区")){
name = name.split("市")[0];
}else {
name = name.split("市")[1];
}
}
}
if (name.contains("市")) {
name = name.split("市")[0] + "市";
} else if (name.contains("县")) {
name = name.split("县")[0] + "县";
}
if (name.contains("分公司")) {
name = name.split("分公司")[0];
}
return name;
}
//疯了,设置这么多变量,有没有简单的办法呀
public void UpdateCode(String id, String code) {
con2 = getCon(con2);
try {
String sql = "update zxy_department_test set EFFPREFECTURECODE=? where orgid=" + id;
ps2 = con2.prepareStatement(sql);
ps2.setString(1, code);
ps2.executeUpdate();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
closeAll(rs2, ps2, con2);
}
}
public String findcode(String name) {
String code = "";
con1 = getCon(con1);
try {
String sql = "select * from zxy_department1 where name like ?";// 预编译语句,“?”代表参数
ps1 = con1.prepareStatement(sql);
ps1.setString(1, name);// 设置参数,前面的1表示参数的索引,而不是表中列名的索引
rs1 = ps1.executeQuery();
while (rs1.next()) {
code = findname(rs1.getString("did"));
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
closeAll(rs1, ps1, con1);
}
return code;
}
//数据导入到表
public void excel_loadDB() {
load_excl excl = new load_excl();
con = getCon(con);
try {
String sql = ("insert into zxy_department1(did,name,pid,lev) values (?,?,?,?)");
ps = con.prepareStatement(sql.toString());
List
list = excl.findlist();
for (int i = 0; i < list.size(); i++) {
String exLog = list.get(i);
String[] tt = exLog.split("#");
ps.setString(1, tt[0]);
ps.setString(2, tt[1]);
ps.setString(3, tt[2]);
ps.setString(4, tt[3]);
// 把一个SQL命令加入命令列表
ps.addBatch();
if (i % 1000 == 0) {
ps.executeBatch();
con.commit();
System.out.println("i:" + i);
}
}
// 执行批量更新
ps.executeBatch();
// 语句执行完毕,提交本事务
con.commit();
System.out.println("入库完毕");
} catch (Exception e) {
e.printStackTrace();
} finally {
closeAll(rs, ps, con);
}
}
}
package com.zxy.newTest;
import java.sql.*;
/**
* Created by smile on 2017/7/29.
*/
public class select {
public static void main(String args[]) throws SQLException {
OracleConnect oral = new OracleConnect();
oral.findsql("select * from zxy_department_test t WHERE T.EFFPREFECTURECODE IS NULL");
// oral.excel_loadDB();
}
}
package com.zxy.newTest;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
//dmuser
public class load_excl {
List
lis = new ArrayList
();
public List
findlist() {
try {
Workbook book = Workbook.getWorkbook(new File("D://县及县以上行政区划分代码.xls"));
// 获得第一个工作表对象
Sheet sheet = book.getSheet(0);
// 得到第一列第一行的单元格
System.out.println("数据添加到列表begin");
for (int i = 0; i < 3224; i++) {
int stat = 3;
Cell cell1 = sheet.getCell(0, i + 2);
Cell cell2 = sheet.getCell(3, i + 2);
String result = cell1.getContents();
String result2 = cell2.getContents();
if (result == null || result == "" || "".equals(result)) {
break;
}
if (result2 == null || result2 == "" || "".equals(result2)) {
cell2 = sheet.getCell(2, i + 2);
result2 = cell2.getContents();
stat = 2;
if (result2 == null || result2 == "" || "".equals(result2)) {
cell2 = sheet.getCell(1, i + 2);
result2 = cell2.getContents();
stat = 1;
}
}
String pid = "0";
switch (stat) {
case 1:
pid = "0";
break;
case 2:
pid = result.toString().substring(0, 2) + "0000";
break;
case 3:
pid = result.toString().substring(0, 4) + "00";
break;
}
lis.add(result + "#" + result2 + "#" + pid + "#" + stat);
}
System.out.println("数据添加到列表end");
book.close();
} catch (Exception e) {
System.out.println(e);
}
return lis;
}
}
package com.zxy.newTest;
import com.oracle.deploy.update.UpdateCheck;
import javax.sql.DataSource;
import javax.swing.text.MaskFormatter;
import java.sql.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by Administrator on 2017/7/31.
*/
public class OracleConnect {
//数据库驱动对象
public static final String DRIVER = "oracle.jdbc.driver.OracleDriver";
//数据库连接地址(数据库名)jdbc:oracle:thin:@ip:1521:liferpt
public static final String URL = "jdbc:oracle:thin:@ip:1521:liferpt";
//登陆名
public static final String USER = "user";
//登陆密码
public static final String PWD = "password";
//创建数据库连接对象
private Connection con = null;
private Connection con1 = null;
private Connection con2 = null;
//创建数据库预编译对象
private PreparedStatement ps = null;
private PreparedStatement ps1 = null;
private PreparedStatement ps2 = null;
//创建结果集
private ResultSet rs = null;
private ResultSet rs1 = null;
private ResultSet rs2 = null;
//创建数据源对象
public static DataSource source = null;
// //静态代码块
// static{
//
// //初始化配置文件context
// try {
// Context context=new InitialContext();
// source=(DataSource)context.lookup("java:comp/env/jdbc/webmessage");
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
//
//
// }
/**
* 获取数据库连接
*/
public Connection getCon(Connection c) {
try {
Class.forName(DRIVER);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
c = DriverManager.getConnection(URL, USER, PWD);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return c;
}
/**
* 关闭所有资源
*/
public void closeAll(ResultSet r, PreparedStatement p, Connection c) {
if (r != null)
try {
r.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (p != null)
try {
p.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (c != null)
try {
c.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void findsql(String sql) {
con = getCon(con);
try {
ps = con.prepareStatement(sql);// 实例化预编译语句
rs = ps.executeQuery();
int i=0;
Map
mm=new HashMap
();
while (rs.next()) {
// 当结果集不为空时
// System.out.println("继续添加");
i++;
String upName = rs.getString("orgname").trim();
System.out.print(upName);
//转换code
String code = findcode(upName);
System.out.println(":-----:"+code);
//更改表的数据
mm.put(rs.getString("orgid"), code);
if(i%1000==0){
UpdateCode(mm);
mm.clear();
System.out.println("准备修改数据传送第"+i+"条");
}
}
UpdateCode(mm);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
closeAll(rs, ps, con);
}
}
public String findcode(String name) {
//这里面就可以直接用map判断了,可以在这里分多级判断,但是本次的数据多级的少也不好切分就不判断了,只在最后转码
if (name.contains("香港") || name.contains("澳门")) {
name= name.substring(0, 2);
} else if(name.contains("作废")){
name = "未知";
}else if (name.contains("经济技术开发区")){
name = name.split("经济技术开发区")[0];
}
else if(name.contains("市)") && name.contains("(")){
name = name.split("\\)")[0].split("\\(")[1];
}
if(name.contains("中心支公司")){
name = name.split("中心支公司")[0];
}
else if(name.contains("本部营销服务部")){
name = name.split("本部营销服务部")[0];
} if(name.contains("本部")){
name = name.split("本部")[0];
}
if(name.contains("支公司")){
name = name.split("支公司")[0];
}
if (name.contains("州")) {
name = name.split("州")[0] + "州";
}
if (name.contains("省")) {
name = name.split("省")[0] + "省";
}
if (name.contains("区")) {
if (name.equals("市辖区")) {
name = "未知";
} else if (name.contains("市辖区")) {
name = name.split("市辖区")[0];
}else if (name.contains("地区")) {
name = name.split("地区")[0];
} else {
name = name.split("区")[0] + "区";
}
if (name.contains("市") && name.contains("区")) {
if(name.equals("开发区")){
name = name.split("市")[0];
}else {
name = name.split("市")[1];
}
}
}
if (name.contains("市")) {
name = name.split("市")[0] + "市";
} else if (name.contains("县")) {
name = name.split("县")[0] + "县";
}
if (name.contains("分公司")) {
name = name.split("分公司")[0];
}
System.out.print(":-----"+name);
return map.get(name);
}
//疯了,设置这么多变量,有没有简单的办法呀
public void UpdateCode(Map
Mas) {
con2 = getCon(con2);
try {
String sql = "update zxy_department_test set EFFPREFECTURECODE=? where orgid=?";
ps2 = con2.prepareStatement(sql);
for (String key:Mas.keySet()){
ps2.setString(1, key);
ps2.setString(2, Mas.get(key));
ps2.addBatch();
}
ps2.executeBatch();
System.out.println("修改完成一批数据");
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
closeAll(rs2, ps2, con2);
}
}
//数据导入到表
public void excel_loadDB() {
load_excl excl = new load_excl();
con = getCon(con);
try {
String sql = ("insert into zxy_department1(did,name,pid,lev) values (?,?,?,?)");
ps = con.prepareStatement(sql.toString());
List
list = excl.findlist();
for (int i = 0; i < list.size(); i++) {
String exLog = list.get(i);
String[] tt = exLog.split("#");
ps.setString(1, tt[0]);
ps.setString(2, tt[1]);
ps.setString(3, tt[2]);
ps.setString(4, tt[3]);
// 把一个SQL命令加入命令列表
ps.addBatch();
if (i % 1000 == 0) {
ps.executeBatch();
con.commit();
System.out.println("i:" + i);
}
}
// 执行批量更新
ps.executeBatch();
// 语句执行完毕,提交本事务
con.commit();
System.out.println("入库完毕");
} catch (Exception e) {
e.printStackTrace();
} finally {
closeAll(rs, ps, con);
}
}
static Map
map=new HashMap
(); //数据读入到map中,Map
<名,码>
public Map
getmap(String sql){ con = getCon(con); try { System.out.println("录入map开始"); ps = con.prepareStatement(sql);// 实例化预编译语句 rs = ps.executeQuery(); while (rs.next()) { map.put(rs.getString("name").trim(),rs.getString("did").trim()); } System.out.println("录入map完毕"); } catch (Exception e) { e.printStackTrace(); } finally { closeAll(rs, ps, con); } return map; } }
package com.zxy.newTest;
import java.sql.*;
/**
* Created by smile on 2017/7/29.
*/
public class select {
public static void main(String args[]) throws SQLException {
long start=System.currentTimeMillis(); //获取开始时间
OracleConnect oral = new OracleConnect();
oral.getmap("select * from zxy_department1");
oral.findsql("select * from zxy_department_test t ");
// oral.excel_loadDB();
long end=System.currentTimeMillis(); //获取结束时间
System.out.println("程序运行时间: "+(end-start)+"ms");
}
}
但是map的模糊匹配是个问题 ,可以解决,使用key循环,包含比对 800条记录的运行时间-----》程序运行时间:29967ms
由原先的30min缩减到29s
当适当的调整批处理的条数时时间也在减少21879ms-》21s
3:换一种方式,直接在数据库中操作(plsql) 20s
plsql支持文档导入
接下来是程序部分
创建函数:findname(),函数写的有点low
create or replace function findname(orgnamein varchar2)
return varchar2
as
name_dep varchar2(50) := orgname;
begin
if instr(name_dep,'作废')>0
then
name_dep:='未知';
elsif instr(name_dep,'经济技术开发区')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'经济技术开发区')-1);
elsif (instr(name_dep,'市)')>0 and instr(name_dep,'(')>0)
then
name_dep:=substr(name_dep,0,instr(name_dep,')')-1);
name_dep:=substr(name_dep,instr(name_dep,'(')+1);
elsif instr(name_dep,'中心支公司')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'中心支公司')-1);
elsif instr(name_dep,'本部营销服务部')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'本部营销服务部')-1);
end if;
if instr(name_dep,'本部')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'本部')-1);
end if;
If instr(name_dep,'支公司')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'支公司')-1);
end if;
if instr(name_dep,'州')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'州')-1);
end if;
if instr(name_dep,'省')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'省')-1);
end if;
if instr(name_dep,'区')>0
then
if (name_dep='市辖区')
then
name_dep:='未知';
elsif instr(name_dep,'地区')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'地区')-1);
elsif instr(name_dep,'区')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'区'));
elsif instr(name_dep,'市')>0
then
if instr(name_dep,'开发区')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'市'));
else
--要注意直接使用else不用then
name_dep:=substr(name_dep,instr(name_dep,'市')+1);
end if;
end if;
end if;
if instr(name_dep,'市')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'市'));
elsif instr(name_dep,'县')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'县'));
end if;
if instr(name_dep,'分公司')>0
then
name_dep:=substr(name_dep,0,instr(name_dep,'分公司')-1);
end if;
return '%'|| name_dep ||'%';
end findname;
修改语句,执行函数
update zxy_department_test a set (EFFPREFECTURECODE)=(select b.did from zxy_department1 b where b.name like findname(a.orgname) )
这种方式约20秒,看来还是直接操作数据库快呀
当然这中间还要各种测试函数删除函数
//删除函数
--DROP FUNCTION findname
//查询函数的状态status =VALID表示创建成功
--select object_name, object_type, status from user_objects where lower(object_name)='findname';
上面其实就是数据从文件导入到数据库,数据加工(映射)的两种形式
如果需要hadoop支持不太适合这种场景,但可以实现
但是可以将数据读入到hdfs文件中-》使用hive做数据关联,再通过sqoop将数据导入到oracle等关系型数据库(貌似有点麻烦)