1.集群各节点挂掉的影响
nimbus挂掉对集群影响较小。
当某个supervisor 挂掉后,zookeeper会向nimbus返回信息,当发现nimbus也挂掉后,会要求nimbus重启。
当有新任务上传时,也会要求nimbus重启。
其余的时候nimbus是闲置状态。
supervisor挂掉后,zookeeper会在通知nimbus,nimbus会将分配到该supervisor的任务重新分配到其他supervisor,当挂掉的supervisor重启后,会向zookeeper询问其任务是否被分配,如果是,则杀死其存储的worker。
Worker挂掉时,Supervisor会重新启动这个进程。如果启动过程中仍然一直失败,并且无法向Nimbus发送心跳,Nimbus会将该Worker重新分配到其他服务器上。
2.消息的完整性
从Spout中发出的Tuple,以及基于他所产生Tuple(例如上个例子当中Spout发出的句子,以及句子当中单词的tuple等)
由这些消息就构成了一棵tuple树,当这棵tuple树发送完成,并且树当中每一条消息都被正确处理,就表明spout发送消息被“完整处理”,即消息的完整性。发送tuple同时会传一个16位2进制的tid,通过比对tid的异或运算确认消息是否发送成功。
注意:ack无法保证数据不被重复计算,但是可以保证数据至少被正确处理一次。
3.样例
public class MySpout implements IRichSpout{
private static final long serialVersionUID = 1L;
int index = 0;
FileInputStream fis;
InputStreamReader isr;
BufferedReader br;
SpoutOutputCollector collector = null;
String str = null;
@Override
public void nextTuple() {
try {
if ((str = this.br.readLine()) != null) {
// 过滤动作
index++;
collector.emit(new Values(str), index);
// collector.emit(new Values(str));
}
} catch (Exception e) {
}
}
@Override
public void close() {
try {
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
try {
this.collector = collector;
this.fis = new FileInputStream("track.log");
this.isr = new InputStreamReader(fis, "UTF-8");
this.br = new BufferedReader(isr);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("log"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@Override
public void ack(Object msgId) {
System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout ack:"+msgId.toString());
}
@Override
public void activate() {
}
@Override
public void deactivate() {
}
@Override
public void fail(Object msgId) {
System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout fail:"+msgId.toString());
}
}
public class MyBolt implements IRichBolt {
private static final long serialVersionUID = 1L;
OutputCollector collector = null;
@Override
public void cleanup() {
}
int num = 0;
String valueString = null;
@Override
public void execute(Tuple input) {
try {
valueString = input.getStringByField("log") ;
if(valueString != null) {
num ++ ;
System.err.println(Thread.currentThread().getName()+" lines :"+num +" session_id:"+valueString.split("\t")[1]);
}
collector.emit(input, new Values(valueString));
// collector.emit(new Values(valueString));
collector.ack(input);
Thread.sleep(2000);
} catch (Exception e) {
collector.fail(input);
e.printStackTrace();
}
}
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector ;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("session_id")) ;
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
public class Main {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new MySpout(), 1);
builder.setBolt("bolt", new MyBolt(), 2).shuffleGrouping("spout");
// Map conf = new HashMap();
// conf.put(Config.TOPOLOGY_WORKERS, 4);
Config conf = new Config() ;
conf.setDebug(true);
conf.setMessageTimeoutSecs(conf, 100);
conf.setNumAckers(4);
if (args.length > 0) {
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
}else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
}