【大厂技术内幕】字节跳动原来是这么做数据迁移的!,java工程师面试简历模板

/**

  • 更新同步状态

  • @param apArticle

*/

void updateSyncStatus(ApArticle apArticle);

}

ApArticleServiceImpl

对ApArticleService相关的操作

代码位置:com.heima.migration.service.impl.ApArticleServiceImpl

@Log4j2

@Service

public class ApArticleServiceImpl implements ApArticleService {

@Autowired

private ApArticleMapper apArticleMapper;

public ApArticle getById(Long id) {

return apArticleMapper.selectById(id);

}

/**

  • 获取未同步的数据

  • @return

*/

public List getUnsyncApArticleList() {

ApArticle apArticleQuery = new ApArticle();

apArticleQuery.setSyncStatus(false);

return apArticleMapper.selectList(apArticleQuery);

}

/**

  • 更新数据同步状态

  • @param apArticle

*/

public void updateSyncStatus(ApArticle apArticle) {

log.info(“开始更新数据同步状态,apArticle:{}”, apArticle);

if (null != apArticle) {

apArticle.setSyncStatus(true);

apArticleMapper.updateSyncStatus(apArticle);

}

}

}

7.5 文章作者接口


7.5.1 mapper定义

ApAuthorMapper

List selectByIds(List ids);

ApAuthorMapper.xml

select * from ap_author

where id in

#{item}

7.5.2 service

对ApAuthor操作的Service

接口位置:com.heima.migration.service.ApAuthorService

public interface ApAuthorService {

List queryByIds(List ids);

ApAuthor getById(Long id);

}

ApAuthorServiceImpl

对ApAuthor相关的操作

代码位置:com.heima.migration.service.impl.ApAuthorServiceImpl

@Service

public class ApAuthorServiceImpl implements ApAuthorService {

@Autowired

private ApAuthorMapper apAuthorMapper;

@Override

public List queryByIds(List ids) {

return apAuthorMapper.selectByIds(ids);

}

@Override

public ApAuthor getById(Long id) {

if (null != id) {

return apAuthorMapper.selectById(id.intValue());

}

return null;

}

}

7.6 综合迁移接口


ArticleQuantityService

操作ArticleQuantity对象的Service ArticleQuantity对象封装了文章相关的数据

接口位置:com.heima.migration.service.ArticleQuantityService

public interface ArticleQuantityService {

/**

  • 获取ArticleQuantity列表

  • @return

*/

public List getArticleQuantityList();

/**

  • 根据ArticleId获取ArticleQuantity

  • @param id

  • @return

*/

public ArticleQuantity getArticleQuantityByArticleId(Long id);

/**

  • 根据ByArticleId从Hbase中获取ArticleQuantity

  • @param id

  • @return

*/

public ArticleQuantity getArticleQuantityByArticleIdForHbase(Long id);

/**

  • 数据库到Hbase的同步

*/

public void dbToHbase();

/**

  • 根据articleId 将数据库的数据同步到Hbase

  • @param articleId

*/

public void dbToHbase(Integer articleId);

}

ArticleQuantityServiceImpl

对ArticleQuantity的相关操作

代码位置:com.heima.migration.service.impl.ArticleQuantityServiceImpl

/**

  • 查询未同步的数据,并封装成ArticleQuantity 对象

*/

@Service

@Log4j2

public class ArticleQuantityServiceImpl implements ArticleQuantityService {

@Autowired

private ApArticleContenService apArticleContenService;

@Autowired

private ApArticleConfigService apArticleConfigService;

@Autowired

private ApAuthorService apAuthorService;

@Autowired

private HBaseStorageClient hBaseStorageClient;

@Autowired

private ApArticleService apArticleService;

/**

  • 查询位同步数据的列表

  • @return

*/

public List getArticleQuantityList() {

log.info(“生成ArticleQuantity列表”);

//查询未同步的庶数据

List apArticleList = apArticleService.getUnsyncApArticleList();

if (apArticleList.isEmpty()) {

return null;

}

//获取ArticleId 的list

List apArticleIdList = apArticleList.stream().map(apArticle -> String.valueOf(apArticle.getId())).collect(Collectors.toList());

//获取AuthorId 的 list

List apAuthorIdList = apArticleList.stream().map(apAuthor -> apAuthor.getAuthorId() == null ? null : apAuthor.getAuthorId().intValue()).filter(x -> x != null).collect(Collectors.toList());

//根据apArticleIdList 批量查询出内容列表

List apArticleContentList = apArticleContenService.queryByArticleIds(apArticleIdList);

//根据apArticleIdList 批量查询出配置列表

List apArticleConfigList = apArticleConfigService.queryByArticleIds(apArticleIdList);

//根据apAuthorIdList 批量查询出作者列

List apAuthorList = apAuthorService.queryByIds(apAuthorIdList);

//将不同的对象转换为 ArticleQuantity 对象

List articleQuantityList = apArticleList.stream().map(apArticle -> {

return new ArticleQuantity() {{

//设置apArticle 对象

setApArticle(apArticle);

// 根据apArticle.getId() 过滤出符合要求的 ApArticleContent 对象

List apArticleContents = apArticleContentList.stream().filter(x -> x.getArticleId().equals(apArticle.getId())).collect(Collectors.toList());

if (null != apArticleContents && !apArticleContents.isEmpty()) {

setApArticleContent(apArticleContents.get(0));

}

// 根据 apArticle.getId 过滤出 ApArticleConfig 对象

List apArticleConfigs = apArticleConfigList.stream().filter(x -> x.getArticleId().equals(apArticle.getId())).collect(Collectors.toList());

if (null != apArticleConfigs && !apArticleConfigs.isEmpty()) {

setApArticleConfig(apArticleConfigs.get(0));

}

// 根据 apArticle.getAuthorId().intValue() 过滤出 ApAuthor 对象

List apAuthors = apAuthorList.stream().filter(x -> x.getId().equals(apArticle.getAuthorId().intValue())).collect(Collectors.toList());

if (null != apAuthors && !apAuthors.isEmpty()) {

setApAuthor(apAuthors.get(0));

}

//设置回调方法 用户方法的回调 用于修改同步状态 插入Hbase 成功后同步状态改为已同步

setHBaseInvok(new ArticleHBaseInvok(apArticle, (x) -> apArticleService.updateSyncStatus(x)));

}};

}).collect(Collectors.toList());

if (null != articleQuantityList && !articleQuantityList.isEmpty()) {

log.info(“生成ArticleQuantity列表完成,size:{}”, articleQuantityList.size());

} else {

log.info(“生成ArticleQuantity列表完成,size:{}”, 0);

}

return articleQuantityList;

}

public ArticleQuantity getArticleQuantityByArticleId(Long id) {

if (null == id) {

return null;

}

ArticleQuantity articleQuantity = null;

ApArticle apArticle = apArticleService.getById(id);

if (null != apArticle) {

articleQuantity = new ArticleQuantity();

articleQuantity.setApArticle(apArticle);

ApArticleContent apArticleContent = apArticleContenService.getByArticleIds(id.intValue());

articleQuantity.setApArticleContent(apArticleContent);

ApArticleConfig apArticleConfig = apArticleConfigService.getByArticleId(id.intValue());

articleQuantity.setApArticleConfig(apArticleConfig);

ApAuthor apAuthor = apAuthorService.getById(apArticle.getAuthorId());

articleQuantity.setApAuthor(apAuthor);

}

return articleQuantity;

}

public ArticleQuantity getArticleQuantityByArticleIdForHbase(Long id) {

if (null == id) {

return null;

}

ArticleQuantity articleQuantity = null;

List typeList = Arrays.asList(ApArticle.class, ApArticleContent.class, ApArticleConfig.class, ApAuthor.class);

List objectList = hBaseStorageClient.getStorageDataEntityList(HBaseConstants.APARTICLE_QUANTITY_TABLE_NAME, DataConvertUtils.toString(id), typeList);

if (null != objectList && !objectList.isEmpty()) {

articleQuantity = new ArticleQuantity();

for (Object value : objectList) {

if (value instanceof ApArticle) {

articleQuantity.setApArticle((ApArticle) value);

} else if (value instanceof ApArticleContent) {

articleQuantity.setApArticleContent((ApArticleContent) value);

} else if (value instanceof ApArticleConfig) {

articleQuantity.setApArticleConfig((ApArticleConfig) value);

} else if (value instanceof ApAuthor) {

articleQuantity.setApAuthor((ApAuthor) value);

}

}

}

return articleQuantity;

}

/**

  • 数据库到Hbase同步

*/

public void dbToHbase() {

long cutrrentTime = System.currentTimeMillis();

List articleQuantitList = getArticleQuantityList();

if (null != articleQuantitList && !articleQuantitList.isEmpty()) {

log.info(“开始进行定时数据库到HBASE同步,筛选出未同步数据量:{}”, articleQuantitList.size());

if (null != articleQuantitList && !articleQuantitList.isEmpty()) {

List hbaseStorageList = articleQuantitList.stream().map(ArticleQuantity::getHbaseStorage).collect(Collectors.toList());

hBaseStorageClient.addHBaseStorage(HBaseConstants.APARTICLE_QUANTITY_TABLE_NAME, hbaseStorageList);

}

} else {

log.info(“定时数据库到HBASE同步为筛选出数据”);

}

log.info(“定时数据库到HBASE同步结束,耗时:{}”, System.currentTimeMillis() - cutrrentTime);

}

@Override

public void dbToHbase(Integer articleId) {

long cutrrentTime = System.currentTimeMillis();

log.info(“开始进行异步数据库到HBASE同步,articleId:{}”, articleId);

if (null != articleId) {

ArticleQuantity articleQuantity = getArticleQuantityByArticleId(articleId.longValue());

if (null != articleQuantity) {

HBaseStorage hBaseStorage = articleQuantity.getHbaseStorage();

hBaseStorageClient.addHBaseStorage(HBaseConstants.APARTICLE_QUANTITY_TABLE_NAME, hBaseStorage);

}

}

log.info(“异步数据库到HBASE同步结束,articleId:{},耗时:{}”, articleId, System.currentTimeMillis() - cutrrentTime);

}

}

7.7 热点文章接口


ApHotArticleService

对ApHotArticle操作Service

接口位置:com.heima.migration.service.ApHotArticleService

public interface ApHotArticleService {

List selectList(ApHotArticles apHotArticlesQuery);

void insert(ApHotArticles apHotArticles);

/**

  • 热数据 Hbase 同步

  • @param apArticleId

*/

public void hotApArticleSync(Integer apArticleId);

void deleteById(Integer id);

/**

  • 查询过期的数据

  • @return

*/

public List selectExpireMonth();

void deleteHotData(ApHotArticles apHotArticle);

}

ApHotArticleServiceImpl

对ApHotArticle的相关操作

代码位置:com.heima.migration.service.impl.ApHotArticleServiceImpl

/**

  • 热点数据操作Service 类

*/

@Service

@Log4j2

public class ApHotArticleServiceImpl implements ApHotArticleService {

@Autowired

private ApHotArticlesMapper apHotArticlesMapper;

@Autowired

private MongoTemplate mongoTemplate;

@Autowired

private ArticleQuantityService articleQuantityService;

@Autowired

private HBaseStorageClient hBaseStorageClient;

@Override

public List selectList(ApHotArticles apHotArticlesQuery) {

return apHotArticlesMapper.selectList(apHotArticlesQuery);

}

/**

  • 根据ID删除

  • @param id

*/

@Override

public void deleteById(Integer id) {

log.info(“删除热数据,apArticleId:{}”, id);

apHotArticlesMapper.deleteById(id);

}

/**

  • 查询一个月之前的数据

  • @return

*/

@Override

public List selectExpireMonth() {

return apHotArticlesMapper.selectExpireMonth();

}

/**

  • 删除过去的热数据

  • @param apHotArticle

*/

@Override

public void deleteHotData(ApHotArticles apHotArticle) {

deleteById(apHotArticle.getId());

String rowKey = DataConvertUtils.toString(apHotArticle.getId());

hBaseStorageClient.gethBaseClent().deleteRow(HBaseConstants.APARTICLE_QUANTITY_TABLE_NAME, rowKey);

MongoStorageEntity mongoStorageEntity = mongoTemplate.findById(rowKey, MongoStorageEntity.class);

if (null != mongoStorageEntity) {

mongoTemplate.remove(mongoStorageEntity);

}

}

/**

  • 插入操作

  • @param apHotArticles

*/

@Override

public void insert(ApHotArticles apHotArticles) {

apHotArticlesMapper.insert(apHotArticles);

}

/**

  • 热点数据同步方法

  • @param apArticleId

*/

@Override

public void hotApArticleSync(Integer apArticleId) {

log.info(“开始将热数据同步,apArticleId:{}”, apArticleId);

ArticleQuantity articleQuantity = getHotArticleQuantity(apArticleId);

if (null != articleQuantity) {

//热点数据同步到DB中

hotApArticleToDBSync(articleQuantity);

//热点数据同步到MONGO

hotApArticleMongoSync(articleQuantity);

log.info(“热数据同步完成,apArticleId:{}”, apArticleId);

} else {

log.error(“找不到对应的热数据,apArticleId:{}”, apArticleId);

}

}

/**

  • 获取热数据的ArticleQuantity 对象

  • @param apArticleId

  • @return

*/

private ArticleQuantity getHotArticleQuantity(Integer apArticleId) {

Long id = Long.valueOf(apArticleId);

ArticleQuantity articleQuantity = articleQuantityService.getArticleQuantityByArticleId(id);

if (null == articleQuantity) {

articleQuantity = articleQuantityService.getArticleQuantityByArticleIdForHbase(id);

}

return articleQuantity;

}

/**

  • 热数据 到数据库Mysql的同步

  • @param articleQuantity

*/

public void hotApArticleToDBSync(ArticleQuantity articleQuantity) {

Integer apArticleId = articleQuantity.getApArticleId();

log.info(“开始将热数据从Hbase同步到mysql,apArticleId:{}”, apArticleId);

if (null == apArticleId) {

log.error(“apArticleId不存在无法进行同步”);

return;

}

ApHotArticles apHotArticlesQuery = new ApHotArticles() {{

setArticleId(apArticleId);

}};

List apHotArticlesList = apHotArticlesMapper.selectList(apHotArticlesQuery);

if (null != apHotArticlesList && !apHotArticlesList.isEmpty()) {

log.info(“Mysql数据已同步过不需要再次同步,apArticleId:{}”, apArticleId);

} else {

ApHotArticles apHotArticles = articleQuantity.getApHotArticles();

apHotArticlesMapper.insert(apHotArticles);

}

log.info(“将热数据从Hbase同步到mysql完成,apArticleId:{}”, apArticleId);

}

/**

  • 热数据向从Hbase到Mongodb同步

  • @param articleQuantity

*/

public void hotApArticleMongoSync(ArticleQuantity articleQuantity) {

Integer apArticleId = articleQuantity.getApArticleId();

log.info(“开始将热数据从Hbase同步到MongoDB,apArticleId:{}”, apArticleId);

if (null == apArticleId) {

log.error(“apArticleId不存在无法进行同步”);

return;

}

String rowKeyId = DataConvertUtils.toString(apArticleId);

MongoStorageEntity mongoStorageEntity = mongoTemplate.findById(rowKeyId, MongoStorageEntity.class);

if (null != mongoStorageEntity) {

log.info(“MongoDB数据已同步过不需要再次同步,apArticleId:{}”, apArticleId);

} else {

List storageDataList = articleQuantity.getStorageDataList();

if (null != storageDataList && !storageDataList.isEmpty()) {

mongoStorageEntity = new MongoStorageEntity();

mongoStorageEntity.setDataList(storageDataList);

mongoStorageEntity.setRowKey(rowKeyId);

mongoTemplate.insert(mongoStorageEntity);

}

}

log.info(“将热数据从Hbase同步到MongoDB完成,apArticleId:{}”, apArticleId);

}

}

8 定时同步数据

=======================================================================

8.1 全量数据从mysql同步到HBase


@Component

@DisallowConcurrentExecution

@Log4j2

/**

  • 全量数据从mysql 同步到HBase

*/

public class MigrationDbToHBaseQuartz extends AbstractJob {

@Autowired

private ArticleQuantityService articleQuantityService;

@Override

public String[] triggerCron() {

/**

  • 2019/8/9 10:15:00

  • 2019/8/9 10:20:00

  • 2019/8/9 10:25:00

  • 2019/8/9 10:30:00

  • 2019/8/9 10:35:00

*/

return new String[]{“0 0/5 * * * ?”};

}

@Override

protected void executeInternal(JobExecutionContext jobExecutionContext) throws JobExecutionException {

log.info(“开始进行数据库到HBASE同步任务”);

articleQuantityService.dbToHbase();

log.info(“数据库到HBASE同步任务完成”);

}

}

8.2 定期删除过期的数据


/**

  • 定期删除过期的数据

*/

@Component

@Log4j2

public class MigrationDeleteHotDataQuartz extends AbstractJob {

@Autowired

private ApHotArticleService apHotArticleService;

@Override

public String[] triggerCron() {

/**

  • 2019/8/9 22:30:00

  • 2019/8/10 22:30:00

  • 2019/8/11 22:30:00

  • 2019/8/12 22:30:00

  • 2019/8/13 22:30:00

*/

return new String[]{“0 30 22 * * ?”};

}

@Override

protected void executeInternal(JobExecutionContext jobExecutionContext) throws JobExecutionException {

long cutrrentTime = System.currentTimeMillis();

log.info(“开始删除数据库过期数据”);

deleteExpireHotData();

log.info(“删除数据库过期数据结束,耗时:{}”, System.currentTimeMillis() - cutrrentTime);

}

/**

  • 删除过期的热数据

*/

public void deleteExpireHotData() {

List apHotArticlesList = apHotArticleService.selectExpireMonth();

if (null != apHotArticlesList && !apHotArticlesList.isEmpty()) {

for (ApHotArticles apHotArticle : apHotArticlesList) {

apHotArticleService.deleteHotData(apHotArticle);

}

}

}

}

9 消息接收同步数据

=========================================================================

9.1 文章审核成功同步


9.1.1 消息发送

(1)消息名称定义及消息发送方法声明

maven_test.properties

kafka.topic.article-audit-success=kafka.topic.article.audit.success.sigle.test

kafka.properties

kafka.topic.article-audit-success=${kafka.topic.article-audit-success}

com.heima.common.kafka.KafkaTopicConfig新增属性

/**

  • 审核成功

*/

String articleAuditSuccess;

com.heima.common.kafka.KafkaSender

/**

  • 发送审核成功消息

*/

public void sendArticleAuditSuccessMessage(ArticleAuditSuccess message) {

ArticleAuditSuccessMessage temp = new ArticleAuditSuccessMessage();

temp.setData(message);

this.sendMesssage(kafkaTopicConfig.getArticleAuditSuccess(), UUID.randomUUID().toString(), temp);

}

(2)修改自动审核代码,爬虫和自媒体都要修改

在审核成功后,发送消息

爬虫

//文章审核成功

ArticleAuditSuccess articleAuditSuccess = new ArticleAuditSuccess();

articleAuditSuccess.setArticleId(apArticle.getId());

小编13年上海交大毕业,曾经在小公司待过,也去过华为、OPPO等大厂,18年进入阿里一直到现在。

深知大多数初中级Java工程师,想要提升技能,往往是自己摸索成长,但自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!

因此收集整理了一份《2024年最新Java开发全套学习资料》送给大家,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友,同时减轻大家的负担。
img
img
img

由于文件比较大,这里只是将部分目录截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频

如果你觉得这些内容对你有帮助,可以添加下面V无偿领取!(备注Java)
img

最后

如果觉得本文对你有帮助的话,不妨给我点个赞,关注一下吧!

s

kafka.topic.article-audit-success=${kafka.topic.article-audit-success}

com.heima.common.kafka.KafkaTopicConfig新增属性

/**

  • 审核成功

*/

String articleAuditSuccess;

com.heima.common.kafka.KafkaSender

/**

  • 发送审核成功消息

*/

public void sendArticleAuditSuccessMessage(ArticleAuditSuccess message) {

ArticleAuditSuccessMessage temp = new ArticleAuditSuccessMessage();

temp.setData(message);

this.sendMesssage(kafkaTopicConfig.getArticleAuditSuccess(), UUID.randomUUID().toString(), temp);

}

(2)修改自动审核代码,爬虫和自媒体都要修改

在审核成功后,发送消息

爬虫

//文章审核成功

ArticleAuditSuccess articleAuditSuccess = new ArticleAuditSuccess();

articleAuditSuccess.setArticleId(apArticle.getId());

小编13年上海交大毕业,曾经在小公司待过,也去过华为、OPPO等大厂,18年进入阿里一直到现在。

深知大多数初中级Java工程师,想要提升技能,往往是自己摸索成长,但自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!

因此收集整理了一份《2024年最新Java开发全套学习资料》送给大家,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友,同时减轻大家的负担。
[外链图片转存中…(img-Cdet4egL-1711163461745)]
[外链图片转存中…(img-bU4mh6I7-1711163461747)]
[外链图片转存中…(img-bREM5mmk-1711163461747)]

由于文件比较大,这里只是将部分目录截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频

如果你觉得这些内容对你有帮助,可以添加下面V无偿领取!(备注Java)
[外链图片转存中…(img-F2yAZ44X-1711163461747)]

最后

如果觉得本文对你有帮助的话,不妨给我点个赞,关注一下吧!

[外链图片转存中…(img-wYxIIMnz-1711163461748)]

[外链图片转存中…(img-JpPdB9YB-1711163461748)]

本文已被CODING开源项目:【一线大厂Java面试题解析+核心总结学习笔记+最新讲解视频+实战项目源码】收录

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值