Mysql亿级数据清洗迁移
本文使用代码做数据清洗和迁移
需求:清洗旧表数据并添加新业务数据至新表
大致步骤
1计算当前数据库写QPS 若每秒内可处理200条数据则size设置为200
2定义定时任务管理每秒调一次 每次执行200条数据
3引入redis根据主键ID排列顺序,存入当前已处理的最后一条数据的主键ID
4从redis中获取主键ID
5查询从主键ID开始偏移量为size的数据
6获取旧数据添加业务数据并插入新表
/**
* type :任务类型
* size :一次迁移数量
*/
public void transfer(Integer type, Integer size) {
RLock rLock = redissonClient.getLock("realname_trasfer:" + type);
if (finished) {
return;
}
if (!rLock.tryLock()) {
log.warn("realname task busy");
}
try {
//任务处理
doTransfer(type, size);
} finally {
rLock.unlock();
}
}
private void doTransfer(Integer type, Integer size) {
long start = System.currentTimeMillis();
//获取当前以处理的数据主键
String skipId = getSkipId(type);
Boolean hasError = false;
if (skipId.indexOf(":") >= 0) {
hasError = true;
}
if (hasError) {
return;
}
//查询以处理的数据节点后一批需要处理的数据
List<OsvServiceEntity> list = osvServiceDAO.getListBySkipId(getSkipId(type), type, size);
if (CollectionUtils.isEmpty(list)) {
log.info("transfer finished skipId:{}", skipId);
finished = true;
return;
}
RAtomicLong scanTotal = redissonClient.getAtomicLong("realname_trasfer_scan:" + type);
scanTotal.expire(30, TimeUnit.DAYS);
String nextSkipId = list.get(list.size() - 1).getId();
//寻找相应的处理器
Map<Integer, List<OsvServiceEntity>> typeMapList = list.stream().collect(Collectors.groupingBy(item -> item.getType()));
try {
for (Map.Entry<Integer, List<OsvServiceEntity>> entry : typeMapList.entrySet()) {
RealNameHandler handler = ApplicationContextUtil.getBean("REALNAME_HANDLER_" + entry.getKey(), RealNameHandler.class);
if (handler == null) {
throw new BaseBizRuntimeException("找不到指定类型实名处理器,type:" + type);
}
handler.handle(entry.getValue().stream().map(item -> item.getId()).collect(Collectors.toList()));
}
updateSkipId(type, nextSkipId);
scanTotal.addAndGet(list.size());
scanTotal.expire(30, TimeUnit.DAYS);
long end = System.currentTimeMillis();
log.info("realname transfer finished,type:{},skipId:{},size:{},nextSkipId:{},scanTotal:{},cost:{}", type, skipId, size, nextSkipId, scanTotal.get(), end - start);
} catch (Exception e) {
log.error("迁移发生异常,任务中断,skipId:{},message:{}", skipId, e.getMessage(), e);
nextSkipId = skipId + ":error";
clear(type, skipId);
updateSkipId(type, nextSkipId);
}
}
private String getSkipId(Integer type) {
String skipId = redisTemplate.opsForValue().get("realname_trasfer_skipId:" + type);
if (StringUtils.isBlank(skipId)) {
return "0";
}
return skipId;
}
DAO层
public List<OsvServiceEntity> getListBySkipId(String skipId, Integer type, Integer size) {
LambdaQueryWrapper<OsvServiceEntity> queryWrapper = new QueryWrapper<OsvServiceEntity>().lambda();
queryWrapper.gt(OsvServiceEntity::getId, skipId);
if (type != null && type >0) {
queryWrapper.eq(OsvServiceEntity::getType, type);
}
queryWrapper.eq(OsvServiceEntity::getServiceStatus, 3);
queryWrapper.orderByAsc(OsvServiceEntity::getId);
queryWrapper.last("limit " + size);
return baseMapper.selectList(queryWrapper);
}
//更新redis缓存
private void updateSkipId(Integer type, String skipId) {
redisTemplate.opsForValue().set("realname_trasfer_skipId:" + type, skipId, 30, TimeUnit.DAYS);
}
//处理器接口
public interface RealNameHandler {
void handle(List<String> serviceIds);
}
@Service("REALNAME_HANDLER_1")
@Slf4j
public class RealNameHandlerType1 implements RealNameHandler {
@Resource
private HBaseDAO hBaseDAO;
@Resource
private DataCleanDAO dataCleanDAO;
@Override
public void handle(List<String> serviceIds) {
//TODO something
}