上一章,讲解ack 与 fail的使用, 到由于数据拆分到多个Bolt 导致数据事务不一致性问题,如何解决呢?
1. 使用Batch 处理,比较简单,但无法享受分布式计算带来的好处,高性能。
2. 手工记录 成功数据 (Message Id + Data ID) 避免重复。
程序代码:
DatabaseSimulatorDataStorage
package com.john.learn.storm.reliability.storage;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.storm.shade.org.eclipse.jetty.util.ConcurrentHashSet;
public class DatabaseSimulatorDataStorage {
public static DatabaseSimulatorDataStorage getInstance() {
return DATA_STORAGE;
}
public void markSuccess(String key) {
boltSuccessMarkers.add(key);
}
public boolean isSuccess(String key) {
return boltSuccessMarkers.contains(key);
}
private ConcurrentHashSet<String> boltSuccessMarkers = new ConcurrentHashSet<String>();
private static final DatabaseSimulatorDataStorage DATA_STORAGE = new DatabaseSimulatorDataStorage();
}
package com.john.learn.storm.reliability.bolt;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.Map;
import java.util.Random;
import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import com.john.learn.storm.reliability.dice.DiceUtils;
import com.john.learn.storm.reliability.storage.DatabaseSimulatorDataStorage;
public class WriteSubjectBolt extends BaseRichBolt {
public void prepare(Map config, TopologyContext context, OutputCollector collector) {
this.collector = collector;
File file = new File("C:\\Storm\\MessageReliabilityBolt\\message.txt");
file.getParentFile().mkdirs();
try {
writer = new FileWriter(file);
} catch (IOException e) {
e.printStackTrace();
}
}
public void declareOutputFields(OutputFieldsDeclarer fieldsDeclarer) {
}
public void execute(Tuple tuple) {
String word = tuple.getStringByField("Word");
Integer index = tuple.getIntegerByField("Index");
String marker = "WriteSubjectBolt.Success." + index + "." + word;
try {
if (DatabaseSimulatorDataStorage.getInstance().isSuccess(marker)) {
// 手工标记成功
collector.ack(tuple);
return;
}
// 故意导致系统异常,测试Storm 容错性
if (failed && "Java".equals(word)) {
int i = 1 / 0;
}
writer.write(word);
writer.write("\r\n");
writer.flush();
System.out.println("[" + Thread.currentThread().getName() + "] 成功写入文件:" + word + " 第" + index + "行数据!");
// 手工标记成功
collector.ack(tuple);
//记录成功标志
DatabaseSimulatorDataStorage.getInstance().markSuccess(marker);
} catch (Throwable e) {
System.out.println("[" + Thread.currentThread().getName() + "] 写入文件失败:" + word + " 第" + index + "行数据!");
// 失败已经处理完毕
if (DiceUtils.isSmall()) {
failed = false;
}
// 手工标记失败
collector.fail(tuple);
}
}
@Override
public void cleanup() {
try {
writer.close();
} catch (Exception e) {
}
}
private OutputCollector collector;
private boolean failed = true;
private Writer writer = null;
private static final long serialVersionUID = 1L;
}
运行结果: 出现多次重复处理,但message.txt 正确的, 说明数据保持弱一致性。