学习目标:
Java批量数据清洗
学习内容:
Java批量数据清洗的方案
学习产出:
1.Service层:
/**
* crm2.4商品概况清洗同步老数据商品总销量
* @return
*/
Map clearDataoOrderCount();
2.ServiceImpl层:
/**
*清洗同步老数据
*
* @return
*/
@Override
@Transactional(rollbackFor = Exception.class)
public Map clearDataoOrderCount() {
long createTimeMillis = System.currentTimeMillis();
HashMap<String, Object> map = new HashMap<>();
List<Integer> countList = Lists.newArrayList();
//1.获取所有数据
List<CrmProductSummary> summaryDaoPage = crmProductSummaryDao.listCrmProductSummary();
log.info("GoodsOverviewServiceImpl->clearDataoOrderCount->全部数据1->summaryDaoPage:{}", JSON.toJSONString(summaryDaoPage));
map.put("countSum->准备清洗总数量", summaryDaoPage.size());
log.info("GoodsOverviewServiceImpl->clearDataoOrderCount->countSum->准备清洗总数量->size:{}", JSON.toJSONString(summaryDaoPage.size()));
//2.分批清洗
if (!CollectionUtil.isEmpty(summaryDaoPage)) {
//分批次清洗1000一次
int insertLength = summaryDaoPage.size();
int i = 0;
while (insertLength > 1000) {
List<CrmProductSummary> productSummaries = summaryDaoPage.subList(i, i + 1000);
List<CrmProductSummary> listSummary = this.clearData(productSummaries);
//执行添加
if (!CollectionUtil.isEmpty(listSummary)) {
this.updateBatchById(listSummary);
}
i = i + 1000;
insertLength = insertLength - 1000;
countList.add(listSummary.size());
}
if (insertLength > 0) {
List<CrmProductSummary> productSummaries = summaryDaoPage.subList(i, i + insertLength);
List<CrmProductSummary> list = this.clearData(productSummaries);
log.info("GoodsOverviewServiceImpl->clearDataoOrderCount->全部数据2->list:{}", JSON.toJSONString(list));
//执行添加
if (!CollectionUtil.isEmpty(list)) {
this.updateBatchById(list);
}
countList.add(list.size());
}
}
log.info("GoodsOverviewServiceImpl->clearDataoOrderCount->清洗完成->map:{}", JSON.toJSONString(map));
map.put("listCountClear->清洗完成总数量", countList);
long endTimeMillis = System.currentTimeMillis();
map.put("清洗数据耗时:毫秒", endTimeMillis - createTimeMillis);
return map;
}
/**
* 清洗同步老数据
*
* @return
*/
@Transactional(rollbackFor = Exception.class)
public List<CrmProductSummary> clearData(List<CrmProductSummary> listSummary) {
List<CrmProductSummary> list = Lists.newArrayList();
//1.判读为空
if (!CollectionUtil.isEmpty(listSummary)) {
//2.根据集合获取商品id
List<String> productIdList = listSummary.stream().map(CrmProductSummary::getProductId).collect(Collectors.toList());
log.info("GoodsOverviewServiceImpl->clearData->根据集合获取商品id->productIdList:{}", JSON.toJSONString(productIdList));
List<String> dataList = productIdList.stream().distinct().collect(Collectors.toList());
log.info("GoodsOverviewServiceImpl->clearData->根据集合获取商品id去重之后->dataList:{}", JSON.toJSONString(dataList));
//3.根据商品id远程获取order-pry
ProductStringListRequest productStringListRequest = new ProductStringListRequest().setIds(dataList);
//4.远程获取到数据
List<CleanProductTotalSalesVo> totalSalesVoList = this.jsonCleanProductTotalSales(productStringListRequest);
log.info("GoodsOverviewServiceImpl->clearData->crm2.4商品概况清洗同步老数据商品总销量->totalSalesVoList:{}", JSON.toJSONString(totalSalesVoList));
if (CollectionUtil.isEmpty(totalSalesVoList)) {
return list;
}
//5.list转map
Map<String, CleanProductTotalSalesVo> mapCleanProductTotalSalesVo = totalSalesVoList.stream().collect(Collectors.toMap(CleanProductTotalSalesVo::getProductId, account -> account));
if (CollectionUtil.isEmpty(mapCleanProductTotalSalesVo)) {
return list;
}
for (CrmProductSummary summary : listSummary) {
//6.根据mysql商品id获取mangodb
log.info("GoodsOverviewServiceImpl->clearData->summary->crm2.4商品概况清洗同步老数据商品总销量->productId:{}", JSON.toJSONString(summary.getProductId()));
CleanProductTotalSalesVo totalSalesVo = mapCleanProductTotalSalesVo.get(summary.getProductId());
log.info("GoodsOverviewServiceImpl->clearData-totalSalesVo>->crm2.4商品概况清洗同步老数据商品总销量->totalSalesVo:{}", JSON.toJSONString(totalSalesVo));
//7.不等于空进行数据赋值
if (totalSalesVo != null) {
summary.setOrderCount(totalSalesVo.getTotalSales());
}
//8.add
list.add(summary);
}
}
return list;
}
/**
* 远程调用
*
* @param productStringListRequest
* @return
*/
@Transactional(rollbackFor = Exception.class)
public List<CleanProductTotalSalesVo> jsonCleanProductTotalSales(ProductStringListRequest productStringListRequest) {
List<CleanProductTotalSalesVo> cleanProductTotalSalesVoList = Lists.newArrayList();
//1.远程获取orser-pay
String cleanProductTotalSalesJSON = okHttpCli.doPostJSON(urlConfig.getCleanProductTotalSales(), productStringListRequest);
log.info("GoodsOverviewServiceImpl->jsonCleanProductTotalSales->远程获取清洗商品概况的商品总销量接口->cleanProductTotalSalesJSON:{}", JSON.toJSONString(cleanProductTotalSalesJSON));
//2.进行数据处理,json解析
if (!StringUtils.isEmpty(cleanProductTotalSalesJSON)) {
//3.业务处理
HashMap hashMap = JSON.parseObject(cleanProductTotalSalesJSON, HashMap.class);
log.info("GoodsOverviewServiceImpl->jsonCleanProductTotalSales->远程获取orser-pay清洗商品概况的商品总销量接口业务处理->hashMap:{}", JSON.toJSONString(hashMap));
if (!CollectionUtil.isEmpty(hashMap) && "200".equals(hashMap.get("code").toString())) {
if (hashMap.get("data") != null) {
cleanProductTotalSalesVoList = JsonUtil.getList(hashMap.get("data").toString(), CleanProductTotalSalesVo.class);
log.info("GoodsOverviewServiceImpl->jsonCleanProductTotalSales->远程获取orser-pay清洗商品概况的商品总销量接口data转map->cleanProductTotalSalesVoList:{}", JSON.toJSONString(cleanProductTotalSalesVoList));
if (CollectionUtil.isEmpty(cleanProductTotalSalesVoList)) {
throw new BusinessException(EnumResultCode.ERROR.getCode(), "远程获取orser-pay清洗商品概况的商品总销量接口为空");
}
}
}
}
log.info("GoodsOverviewServiceImpl->jsonCleanProductTotalSales->远程获取orser-pay清洗商品概况的商品总销量接口响应输出->cleanProductTotalSalesVoList:{}", JSON.toJSONString(cleanProductTotalSalesVoList));
return cleanProductTotalSalesVoList;
}
3.Controller层
/**
* @return {@link Map< String, Object>}
* @author
* @date 2022/4/8 16:20
* @description 清洗数据
**/
@PostMapping("/clearDataoOrderCount")
public ResponseResult<Map> clearDataoOrderCount(){
Map map = goodsOverviewService.clearDataoOrderCount();
log.info("GoodsOverviewController->clearDataoOrderCount->清洗响应->map:{}", JSON.toJSONString(map));
return ResponseResult.buildSuccess(map);
}