Mysql亿级数据清洗迁移

Mysql亿级数据清洗迁移

本文使用代码做数据清洗和迁移

需求:清洗旧表数据并添加新业务数据至新表
大致步骤
1计算当前数据库写QPS 若每秒内可处理200条数据则size设置为200
2定义定时任务管理每秒调一次 每次执行200条数据
3引入redis根据主键ID排列顺序,存入当前已处理的最后一条数据的主键ID
4从redis中获取主键ID
5查询从主键ID开始偏移量为size的数据
6获取旧数据添加业务数据并插入新表

    /**
    * type :任务类型
    * size :一次迁移数量
    */
    public void transfer(Integer type, Integer size) {
    
        RLock rLock = redissonClient.getLock("realname_trasfer:" + type);
        if (finished) {
            return;
        }
        if (!rLock.tryLock()) {
            log.warn("realname task busy");
        }
        try {
        	//任务处理
            doTransfer(type, size);
        } finally {
            rLock.unlock();
        }
    }

    private void doTransfer(Integer type, Integer size) {
        long start = System.currentTimeMillis();
        //获取当前以处理的数据主键
        String skipId = getSkipId(type);
        Boolean hasError = false;
        if (skipId.indexOf(":") >= 0) {
            hasError = true;
        }

        if (hasError) {
            return;
        }
		//查询以处理的数据节点后一批需要处理的数据
        List<OsvServiceEntity> list = osvServiceDAO.getListBySkipId(getSkipId(type), type, size);
        if (CollectionUtils.isEmpty(list)) {
            log.info("transfer finished skipId:{}", skipId);
            finished = true;
            return;
        }

        RAtomicLong scanTotal = redissonClient.getAtomicLong("realname_trasfer_scan:" + type);
        scanTotal.expire(30, TimeUnit.DAYS);

        String nextSkipId = list.get(list.size() - 1).getId();
        //寻找相应的处理器
        Map<Integer, List<OsvServiceEntity>> typeMapList = list.stream().collect(Collectors.groupingBy(item -> item.getType()));
        try {
            for (Map.Entry<Integer, List<OsvServiceEntity>> entry : typeMapList.entrySet()) {
                RealNameHandler handler = ApplicationContextUtil.getBean("REALNAME_HANDLER_" + entry.getKey(), RealNameHandler.class);
                if (handler == null) {
                    throw new BaseBizRuntimeException("找不到指定类型实名处理器,type:" + type);
                }
                handler.handle(entry.getValue().stream().map(item -> item.getId()).collect(Collectors.toList()));
            }

            updateSkipId(type, nextSkipId);
            scanTotal.addAndGet(list.size());
            scanTotal.expire(30, TimeUnit.DAYS);
            long end = System.currentTimeMillis();
            log.info("realname transfer finished,type:{},skipId:{},size:{},nextSkipId:{},scanTotal:{},cost:{}", type, skipId, size, nextSkipId, scanTotal.get(), end - start);
        } catch (Exception e) {
            log.error("迁移发生异常,任务中断,skipId:{},message:{}", skipId, e.getMessage(), e);
            nextSkipId = skipId + ":error";
            clear(type, skipId);
            updateSkipId(type, nextSkipId);
        }
    }
   private String getSkipId(Integer type) {
        String skipId = redisTemplate.opsForValue().get("realname_trasfer_skipId:" + type);
        if (StringUtils.isBlank(skipId)) {
            return "0";
        }
        return skipId;
    }

DAO层

  public List<OsvServiceEntity> getListBySkipId(String skipId, Integer type, Integer size) {
        LambdaQueryWrapper<OsvServiceEntity> queryWrapper = new QueryWrapper<OsvServiceEntity>().lambda();
        queryWrapper.gt(OsvServiceEntity::getId, skipId);
        if (type != null && type >0) {
            queryWrapper.eq(OsvServiceEntity::getType, type);
        }
        queryWrapper.eq(OsvServiceEntity::getServiceStatus, 3);
        queryWrapper.orderByAsc(OsvServiceEntity::getId);
        queryWrapper.last("limit " + size);
        return baseMapper.selectList(queryWrapper);
    }
//更新redis缓存
  private void updateSkipId(Integer type, String skipId) {
        redisTemplate.opsForValue().set("realname_trasfer_skipId:" + type, skipId, 30, TimeUnit.DAYS);
    }
//处理器接口
public interface RealNameHandler {

    void handle(List<String> serviceIds);
}
@Service("REALNAME_HANDLER_1")
@Slf4j
public class RealNameHandlerType1 implements RealNameHandler {

    @Resource
    private HBaseDAO hBaseDAO;
    @Resource
    private DataCleanDAO dataCleanDAO;

    @Override
    public void handle(List<String> serviceIds) {
		//TODO something
	}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值