1、按行分组读取文件
Spout
public class AmtSpout implements IRichSpout {
private FileInputStream fileInputStream;
private InputStreamReader inputStreamReader;
private BufferedReader bufferedReader;
SpoutOutputCollector collector = null;
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
try {
this.collector = spoutOutputCollector;
this.fileInputStream = new FileInputStream("G:\\store_utf8.txt");
this.inputStreamReader = new InputStreamReader(fileInputStream);
this.bufferedReader = new BufferedReader(inputStreamReader);
} catch (Exception e) {
e.printStackTrace();
}
}
String str = null;
@Override
public void nextTuple() {
try {
if ((str = this.bufferedReader.readLine()) != null) {
String[] arr = str.split("::");
if(arr.length == 2 ){
collector.emit(new Values(arr[0],arr[1]));
}
}
}catch (Exception e){
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("id","price"));
}
...
}
Bolt
public class AmtBolt implements IRichBolt {
@Override
public void execute(Tuple tuple) {
String id= tuple.getStringByField("id");
Double price= Double.parseDouble(tuple.getStringByField("price"));
System.out.println(id+":"+price);
}
...
}
Topology
public class AmtTopology {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new AmtSpout(),1);
//分组,将id相同的分为一组,与MR中的reduce类似
builder.setBolt("bolt1", new AmtBolt(), 2).fieldsGrouping("spout", new Fields("id"));
Config conf = new Config();
conf.setDebug(true);
conf.setNumWorkers(2);
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("tm",conf,builder.createTopology());
}
}
2、滚动窗口
Spout和上面一致
Bolt
public class TumblingBolt extends BaseWindowedBolt {
@Override
public void execute(TupleWindow tupleWindow) {
List<Tuple> tuples = tupleWindow.get();
Double price= 0.0;
for (Tuple input : tuples) {
price+= Double.parseDouble(input.getStringByField("price"));
}
Double avg = price/tuples.size();
System.out.println(price+":"+avg);
}
...
}
Topology
public class TumblingTopology {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new TumblingSpout(),1);
//每隔5行执行一次
builder.setBolt("bolt", new TumblingBolt().withTumblingWindow(new BaseWindowedBolt.Count(5)), 1).shuffleGrouping("spout");
Config conf = new Config();
conf.setDebug(true);
conf.setNumWorkers(2);
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("tm",conf,builder.createTopology());
}
}
3、滑动窗口
Spout和上面一致
Bolt
public class SlidingBolt extends BaseWindowedBolt {
Double amt = 0.0;
@Override
public void execute(TupleWindow tupleWindow) {
List<Tuple> newTuple = tupleWindow.getNew();
List<Tuple> expiredTuple = tupleWindow.getExpired();
//窗口中有效的Tuple
for(Tuple tuple:newTuple){
amt += Double.parseDouble(tuple.getStringByField("amt"));
}
//窗口中过期的Tuple
for (Tuple tuple : expiredTuple) {
amt -= Double.parseDouble(tuple.getStringByField("amt"));
}
//输出窗口中的值
System.out.println("近十个订单金额为:"+amt);
}
}
Topology
public class SlidingTopology {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new SlidingSpout(),1);
//设置窗口为10,步长为5,每5行执行10行内的数据
builder.setBolt("bolt", new SlidingBolt().withWindow(new BaseWindowedBolt.Count(5),new BaseWindowedBolt.Count(5)), 1).shuffleGrouping("spout");
Config conf = new Config();
conf.setDebug(true);
conf.setNumWorkers(2);
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("tmall",conf,builder.createTopology());
}
}
4、ACK机制
Spout
public class AckSpout implements IRichSpout {
private FileInputStream fileInputStream = null;
private InputStreamReader inputStreamReader = null;
private BufferedReader bufferedReader = null;
private ConcurrentHashMap<Object, Values> _pending; //线程安全的Map,存储emit过的tuple
private ConcurrentHashMap<Object,Integer> fail_pending; //存储失败的tuple和失败次数
private SpoutOutputCollector collector;
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
try {
this.collector = spoutOutputCollector;
this.fileInputStream = new FileInputStream("xxx\\test.txt");
this.inputStreamReader = new InputStreamReader(fileInputStream);
this.bufferedReader = new BufferedReader(inputStreamReader);
this._pending = new ConcurrentHashMap<>();
this.fail_pending = new ConcurrentHashMap<>();
} catch (Exception e) {
e.printStackTrace();
}
}
String str = null;
@Override
public void nextTuple() {
try {
//数据格式 1 30
while ((str = this.bufferedReader.readLine()) != null) {
System.out.println("start read");
UUID msgid = UUID.randomUUID();
String arr[] = str.split("\t");
Values values = new Values(arr[0], arr[1]);
this._pending.put(msgid, values);
collector.emit(values, msgid);
System.out.println("_pending.size():"+this._pending.size());
}
}catch (Exception e){
e.printStackTrace();
}
}
@Override
public void ack(Object msgId) {
System.out.println("_pending size 共有:" + _pending.size());
System.out.println("spout ack:" + msgId.toString() + "--" + msgId.getClass());
this._pending.remove(msgId);
System.out.println("_pending size 剩余:"+_pending.size());
}
@Override
public void fail(Object msgId) {
System.out.println("spout fail:"+msgId.toString());
Integer fail_count = fail_pending.get(msgId); //获取Tuple失败次数
if (fail_count == null) {
fail_count = 0;
}
fail_count ++;
if (fail_count >= 3) {
//重试次数已满,不再进行重新emit
fail_pending.remove(msgId);
}else{
//记录该tuple失败次数
fail_pending.put(msgId, fail_count);
//重发
this.collector.emit(this._pending.get(msgId), msgId);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("id","price"));
}
...
}
Bolt
public class AckBolt implements IRichBolt {
private OutputCollector collector = null;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
}
@Override
public void execute(Tuple tuple) {
try {
String id = tuple.getStringByField("id");
String price = tuple.getStringByField("price");
System.out.println(id+":"+price);
collector.emit(tuple, new Values(id, price));
collector.ack(tuple);
} catch (Exception e) {
collector.fail(tuple);
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("id","price"));
}
...
}
Bolt2
public class AckBolt2 implements IRichBolt {
private OutputCollector collector = null;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
}
@Override
public void execute(Tuple tuple) {
try {
String id = tuple.getStringByField("id");
Double price = Double.parseDouble(tuple.getStringByField("price"));
System.out.println(id+":"+price);
collector.ack(tuple);
} catch (Exception e) {
collector.fail(tuple);
e.printStackTrace();
}
}
...
}
Topology
public class AckTopology {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new AckSpout(),1);
builder.setBolt("bolt", new AckBolt(), 1).shuffleGrouping("spout");
builder.setBolt("bolt2", new AckBolt2(), 1).shuffleGrouping("bolt");
Config conf = new Config();
conf.setDebug(true);
conf.setNumWorkers(2);
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("tma",conf,builder.createTopology());
}
}