将一个用','号连接的字段拆分为多行。
1.SQL实现
使用sql进行分割操作的话,在数据量大的情况下,速度还是比较慢的。
SELECT
ID,
NAME,
CASE
WHEN INSTR(lala/*需要拆分为多行的列*/, ',') <= 0 THEN lala/*需要拆分为多行的列*/
ELSE
CASE
WHEN levels.LVL = 1 THEN SUBSTR(lala/*需要拆分为多行的列*/,0,INSTR(lala/*需要拆分为多行的列*/, ',', 1, levels.lvl)-1)
WHEN a.cnt = levels.lvl AND a.CNT > 1 THEN SUBSTR(lala/*需要拆分为多行的列*/,INSTR(lala/*需要拆分为多行的列*/, ',', 1, levels.lvl-1)+1)
ELSE SUBSTR(lala/*需要拆分为多行的列*/, INSTR(lala/*需要拆分为多行的列*/, ',', 1, levels.lvl-1) + 1, INSTR(lala/*需要拆分为多行的列*/, ',', 1, levels.lvl)-INSTR(lala/*需要拆分为多行的列*/, ',', 1, levels.lvl-1)-1)
END
END lala/*需要拆分为多行的列*/
FROM
(SELECT POSITION_ID,DUTY_DESCRIPTION,IS_DELETE,lala/*需要拆分为多行的列*/,length(lala/*需要拆分为多行的列*/) - nvl(length(REPLACE(lala/*需要拆分为多行的列*/, ',')), 0) + 1 AS cnt
FROM table/*需要拆分的表*/) a,
(SELECT rownum AS lvl FROM (SELECT MAX(length(lala/*需要拆分为多行的列*/ || ',') - nvl(length(REPLACE(lala/*需要拆分为多行的列*/, ',')), 0)) max_len
FROM table/*需要拆分的表*/) CONNECT BY LEVEL <= max_len) levels
WHERE levels.lvl <= a.cnt
ORDER BY ID,NAME
2. JAVA实现
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class DataProcessor {
private static final String cname = "dm.jdbc.driver.DmDriver";
private static final int BATCH_SIZE = 5000; // Number of records to process in each batch
private static final int NUM_THREADS = 4; // Number of threads to use for parallel processing
public static void main(String[] args) {
System.out.println("开始执行---------------");
// Establish database connection
Connection connection = null;
try {
Class.forName(cname);
connection = DriverManager.getConnection("数据库url", "数据库username", "数据库password");
System.out.println("连接数据库成功");
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// Fetch data from the source table
List<DataRecord> records = fetchData(connection);
// Split data into batches
List<List<DataRecord>> batches = splitIntoBatches(records, BATCH_SIZE);
// Create thread pool for parallel processing
ExecutorService executor = Executors.newFixedThreadPool(NUM_THREADS);
// Process each batch concurrently
for (List<DataRecord> batch : batches) {
Runnable task = new ProcessBatchTask(batch, connection);
executor.execute(task);
}
// Shut down the executor after all tasks are complete
executor.shutdown();
}
private static List<DataRecord> fetchData(Connection connection) {
List<DataRecord> records = new ArrayList<DataRecord>();
try {
Statement statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery("select 字段1,需要分割的字段2 from 数据库表");
while (resultSet.next()) {
String indexOne = resultSet.getString("字段1");
String indexTwo = resultSet.getString("需要分割的字段2");
records.add(new DataRecord(indexOne, indexTwo));
}
resultSet.close();
statement.close();
} catch (SQLException e) {
e.printStackTrace();
}
return records;
}
private static List<List<DataRecord>> splitIntoBatches(List<DataRecord> records, int batchSize) {
List<List<DataRecord>> batches = new ArrayList();
int numRecords = records.size();
int numBatches = (int) Math.ceil((double) numRecords / batchSize);
System.out.println("数据行数:"+numRecords);
for (int i = 0; i < numBatches; i++) {
int fromIndex = i * batchSize;
int toIndex = Math.min((i + 1) * batchSize, numRecords);
List<DataRecord> batch = records.subList(fromIndex, toIndex);
batches.add(batch);
}
return batches;
}
private static void insertData(Connection connection, List<DataRecord> records) {
try {
PreparedStatement statement = connection.prepareStatement("INSERT INTO 分割后数据存储表 (字段1, 需要分割的字段2) VALUES (?, ?)");
for (DataRecord record : records) {
statement.setString(1, record.getIndexOne());
if(record!=null && record.getIndexTwo()!=null){
String[] indexTwos = record.getIndexTwo().split(",");
for (String code : indexTwos) {
statement.setString(2, code);
statement.addBatch();
}
}
}
statement.executeBatch();
statement.close();
System.out.println("结束执行------------");
} catch (SQLException e) {
e.printStackTrace();
}
}
private static class ProcessBatchTask implements Runnable {
private List<DataRecord> batch;
private Connection connection;
public ProcessBatchTask(List<DataRecord> batch, Connection connection) {
this.batch = batch;
this.connection = connection;
}
public void run() {
insertData(connection, batch);
}
}
private static class DataRecord {
private String indexOne;
private String indexTwo;
public DataRecord(String indexOne, String indexTwo) {
this.indexOne = indexOne;
this.indexTwo = indexTwo;
}
public String getIndexOne() {
return indexOne;
}
public String getIndexTwo() {
return indexTwo;
}
}
}