要求:解析json文件,取出其中的参数,修改数据库中的数据
数据量:190万条
使用线程池批量处理sql
1、线程:
public class DateHandleThread extends Thread {
public void run() {
//System.out.println("线程:" + Thread.currentThread().getName());
Integer num = BatchUpdateData.getLine();//获取对应的线程锁,每个线程处理各自对应的数据
ArrayList<String> sqlData = BatchUpdateData.getSqlData(num);//获取每个线程需处理的数据
BatchUpdateData.runSql(sqlData);
}
}
2、BatchUpdateData.java
public class BatchUpdateData {
private static AtomicInteger line = new AtomicInteger(0);
static int threadNum = 4;//配置需要执行的线程数
static String folderPath="C:\\Users\\admin\\Desktop\\phone";//文件夹的路径
static int batchHandleNum = 2000;//一次批量导入的条数,批量导入可以大幅度提升性能(2000条执行一次)
//数据库配置
static String url = "";
static String user = "";
static String password = "";
static ExecutorService pool = Executors.newFixedThreadPool(threadNum);
//存放sql
static ArrayList<String> sqlList = new ArrayList<>();
//异常日志记录 TODO String泛型可以改成 日志记录表的对象
protected static ConcurrentLinkedQueue<String> queue = new ConcurrentLinkedQueue<>();
private static Boolean isInit = Boolean.FALSE;
public static Boolean DateThreadisRes = Boolean.FALSE;
static SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
public static void main(String[] args) throws Exception {
Timer timer = new Timer(true);
//TODO 此方法如果系统更新时间之后,会导致挂起 可使用
timer.schedule(new MsgInfo(), 1000l, 1000l);//默认1秒后,每秒定时扫描
long starttime = System.currentTimeMillis();
BatchUpdateData.doJob();
try {
// awaitTermination返回false即超时会继续循环,返回true即线程池中的线程执行完成主线程跳出循环往下执行,每隔10秒循环一次
while (!pool.awaitTermination(10, TimeUnit.SECONDS)) ;
DateThreadisRes = Boolean.TRUE;//标记线程池处理结束
long spendtime = System.currentTimeMillis() - starttime;
System.out.println("数据处理线程池执行结束,处理成功,耗时:" + spendtime / 1000 + "秒");
//如果当前队列大于0,说明还有消息没有保存
if (queue.size() > 0) {
System.out.println("当前队列剩余:" + queue.size() + "。主线程将休眠:" + queue.size() + "s,若执行完毕可手动关闭!");
Thread.sleep(queue.size() * 1000);//主线程等待队列,最坏的情况一秒保存一条一直出现提示即可关闭
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public static int getLine() {
return line.addAndGet(1);
}
public static ArrayList<String> getSqlData(int num) {
//获取本次执行的线程数
int piece = sqlList.size() / threadNum;//每个线程应该分派的任务数量
//每个线程应该取的范围集合
int res = num * piece;
int start = piece * (num - 1);
ArrayList<String> tempSql = new ArrayList();
if (num == threadNum) {
System.out.println("第" + num + "个线程处理数据范围为:[" + start + "," + sqlList.size() + ")");
for (int i = start; i < sqlList.size(); i++) {
tempSql.add(sqlList.get(i));
}
} else {
System.out.println("第" + num + "个线程处理数据范围为:[" + start + "," + res + ")");
for (int i = start; i < res; i++) {
tempSql.add(sqlList.get(i));
}
}
return tempSql;
}
/**
* 创建数据库链接
* @return
*/
public static Connection getConnect() {
Connection con = null;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
con = DriverManager.getConnection(url, user, password);
} catch (Exception e) {
e.printStackTrace();
}
return con;
}
/**
* 根据线程数量分配sql,调用执行sql的方法dealDate
* @param stringList
*/
public static void runSql(ArrayList<String> stringList) {
Connection con = getConnect();
try {
Statement st = con.createStatement();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < stringList.size(); i++) {
st.addBatch(stringList.get(i));
sb.append(stringList.get(i)).append(";").append(System.getProperty("line.separator"));
if (i == stringList.size()) {//剩余不足batchHandleNum条数的批量执行
dealDate(st, sb);
}
if (i % batchHandleNum == 0) {//满batchHandleNum批量执行
System.out.println("一次批量导入的sql语句有:"+i+"条");
dealDate(st, sb);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
con.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
/**
* 操作数据库,修改数据
* @param st
* @param sb
*/
private static void dealDate(Statement st, StringBuilder sb) {
try {
st.executeBatch();
sb.setLength(0);//清空
} catch (Exception e) {
System.out.println("执行异常:异常原因:" + e.getMessage());
//记录失败sql
logs(sb.toString());
//TODO 可使用队列,mq,kafka,异步处理
queue.add(sb.toString());
sb.setLength(0);//清空
}
}
/**
* 异常日志处理,可以导出excel,txt文件
* @param text
*/
public static void logs(String text) {
try {
BufferedWriter out = new BufferedWriter(new FileWriter(folderPath + "err.sql", true));//追加写入
out.write(text);
out.close();
} catch (IOException e) {
}
}
/**
* 分配任务的数量,开启线程
* @throws Exception
*/
public static void doJob() throws Exception {
//单线程读取
readFile(folderPath);
System.out.println("解析到待处理sql数量:" + sqlList.size());
//每个线程只取自己处理的那部分数据
for (int i = 0; i < threadNum; i++) {
Thread thread = new DateHandleThread();
pool.execute(thread);
}
pool.shutdown();
}
/**
* 读取文件夹
* @param fileName
* @return
*/
public static String readJsonFile(String fileName) {
String jsonStr = "";
try {
File jsonFile = new File(fileName);
FileReader fileReader = new FileReader(jsonFile);
Reader reader = new InputStreamReader(new FileInputStream(jsonFile),"utf-8");
int ch = 0;
StringBuffer sb = new StringBuffer();
while ((ch = reader.read()) != -1) {
sb.append((char) ch);
}
fileReader.close();
reader.close();
jsonStr = sb.toString();
return jsonStr;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
/**
* 读取文件夹下的所有json文件
* @param filepath
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public static boolean readFile(String filepath) throws FileNotFoundException, IOException {
try {
File file = new File(filepath);
if (!file.isDirectory()) {
System.out.println("文件");
System.out.println("path=" + file.getPath());
System.out.println("absolutepath=" + file.getAbsolutePath());
System.out.println("name=" + file.getName());
} else if (file.isDirectory()) {
System.out.println("=========================解析Json=========================");
String[] fileList = file.list();
int sum=0;
for ( int i = 0; i < fileList.length; i++) {
File readfile = new File(filepath + "\\" + fileList[i]);
if (!readfile.isDirectory()) {
json(readfile.getAbsolutePath()); //调用json解析方法
} else if (readfile.isDirectory()) {
readFile(filepath + "\\" + fileList[i]);
}
}
}
} catch (FileNotFoundException e) {
System.out.println("readfile() Exception:" + e.getMessage());
}
return true;
}
/**
* json数据类型处理(取出参数,存放sql,存放的sql类型为字符串)
* @param fileName
*/
public static void json(String fileName) {
//String path = parsingJson.class.getClassLoader().getResource(fileName).getPath();
String s = readJsonFile(fileName);
System.out.println(fileName);
JSONObject jobj = JSON.parseObject(s);
//修改号码的归属地信息
//TODO(getJSONArray中的之必须要填你需要读取的json数组的名称,不然会出现空指针异常)
/*JSONArray movies = jobj.getJSONArray("resultList");//构建JSONArray数组
for (int i = 0 ; i < movies.size();i++) {
HashMap<String, String> hashMap = new HashMap<>();
JSONObject key = (JSONObject) movies.get(i);
hashMap.put("mobile", ((String) key.get("mobile")));
hashMap.put("city", ((String) key.get("city")));
hashMap.put("numberType", ((String) key.get("numberType")));
hashMap.put("province", ((String) key.get("province")));
String sql="";
sqlList.add(i, sql);
}*/
//修改风险级别
JSONArray movies = jobj.getJSONArray("data");//构建JSONArray数组
for (int i = 0 ; i < movies.size();i++){
HashMap<String, String> hashMap = new HashMap<>();
JSONObject key = (JSONObject)movies.get(i);
String number=(String)key.get("number");
String risk=updateRisk((String)key.get("info"));
String sql = "UPDATE tb_shop_phone SET risk='"+risk+"',update_time ='"+df.format(new Date())+"' WHERE phone='"+number+"'";
sqlList.add(i, sql);
}
}
/**
* info数据处理,修改风险级别
* 1、风险较小 2、风险较大 3、靓号限制
* @param info
*/
public static String updateRisk(String info) {
String risk="2";//默认设置成风险较大
String reg = "[^\u4e00-\u9fa5]";//正则
info = info.replaceAll(reg, "");//获取中文,将其他的字符串去除
if("靓号限制".equals(info)){
risk="3";
}else if("风险较小号码".equals(info)){
risk="1";
}
return risk;
}
}
3、异步处理定时任务: MsgInfo.java
java
public class MsgInfo extends TimerTask{
public void run() {
ConcurrentLinkedQueue<String> value = BatchUpdateData.queue;
try {
while (!value.isEmpty()) {
String logInfo = value.poll();
//TODO 存入数据库或者写入文件,或者导出excel
BatchUpdateData.logs(logInfo);
//System.out.println("错误sql记录完成!");
}
} catch (Exception e) {
System.err.println("save messagelogs error:" + e);
}
if (BatchUpdateData.DateThreadisRes) {
System.out.println("当前队列剩余数量:" + value.size() + ",异步保存完毕,可手动关闭主线程!");
}
}
}
结果:
批量修改110万条数据,耗时:9623秒
线程数:4
单次批量导入:2000
原文借鉴:
https://blog.csdn.net/qq_33522040/article/details/90642527