需求
Trident实现用户使用浏览器统计
通过DRPC查看结果
开发过程
实现IBatchSpout批量读取日志文件
public class MyBatchSpout implements IBatchSpout {
Fields fields;
HashMap<Long, List<List<Object>>> batches = new HashMap();
public MyBatchSpout(Fields fields) {
this.fields = fields;
}
@Override
public void open(Map map, TopologyContext topologyContext) {
}
@Override
public void emitBatch(long batchId, TridentCollector tridentCollector) {
List<List<Object>> batch = (List) this.batches.get(Long.valueOf(batchId));
if (null == batch) {
batch = new ArrayList<List<Object>>();
//读取日志文件列表
String dataDir = "logs/";
File file = new File(dataDir);
Collection<File> listFiles = FileUtils.listFiles(file, new String[]{"log"}, true);
for (File f : listFiles) {
List<String> readLines = null;
try {
readLines = FileUtils.readLines(f);
for (String line : readLines) {
batch.add(new Values(line));
}
} catch (IOException e) {
e.printStackTrace();
}
// 文件已经处理完成,在末尾添加done和时间戳,避免重复读取
try {
File srcFile = f.getAbsoluteFile();
File destFile = new File(srcFile + ".done." + System.currentTimeMillis());
FileUtils.moveFile(srcFile, destFile);
} catch (Exception e) {
e.printStackTrace();
}
this.batches.put(batchId, batch);
}
}
for (List<Object> list : batch) {
tridentCollector.emit(list);
}
}
@Override
public void ack(long batchId) {
this.batches.remove(Long.valueOf(batchId));
}
@Override
public void close() {
}
@Override
public Map<String, Object> getComponentConfiguration() {
Config conf = new Config();
conf.setMaxTaskParallelism(1);
return conf;
}
@Override
public Fields getOutputFields() {
return this.fields;
}
}
实现BaseFunction,从每行日志中分割出IP地址和浏览器信息,合并成一个新字段IP_browser
public static class Split extends BaseFunction{
@Override
public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
String line=tridentTuple.getString(0);
String[] words=line.split("\"");
String IP=words[0].split(" ")[0];
String browser=words[5];
if (!browser.equals("-")){
String IP_browser=IP+"_"+browser;
tridentCollector.emit(new Values(IP_browser));
}
}
}
实现BaseAggregator,剔除重复的IP_browser,发送新字段IP和数量1
public static class Aggregate extends BaseAggregator<Map<String,Integer>>{
@Override
public Map<String,Integer> init(Object o, TridentCollector tridentCollector) {
return new HashMap<String,Integer>() ;
}
@Override
public void aggregate(Map<String,Integer> map, TridentTuple tridentTuple, TridentCollector tridentCollector) {
String IP_browser=tridentTuple.getStringByField("IP_browser");
String[] words=IP_browser.split("_");
String IP=words[0];
map.put(IP,1);
}
@Override
public void complete(Map<String,Integer> map, TridentCollector tridentCollector) {
for (Map.Entry<String,Integer> e:map.entrySet()){
tridentCollector.emit(new Values(e.getKey()));
}
}
}
创建TridentTopology,提供用于构建Trident实时计算程序的一些接口
创建一个TridentState对象,通过newStream方法从指定的Spout创建一个新的数据输入流
日志拆分合并新字段->聚合去重复->count统计数量
创建DRPC流,函数命名为browserFunction
public static StormTopology buildTopology(LocalDRPC drpc) {
TridentTopology topology = new TridentTopology();
MyBatchSpout spout = new MyBatchSpout(new Fields("line"));
TridentState tridentState=topology.newStream("spout",spout)
.parallelismHint(4)
.each(new Fields("line"),new Split(),new Fields("IP_browser"))
.parallelismHint(4)
.groupBy(new Fields("IP_browser"))
.partitionAggregate(new Fields("IP_browser"),new Aggregate(),new Fields("IP"))
.toStream()
.parallelismHint(4)
.groupBy(new Fields("IP"))
.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));
topology.newDRPCStream("browserFunction",drpc)
.stateQuery(tridentState,new Fields("args"),new MapGet(),new Fields("count"))
.each(new Fields("count"),new FilterNull());
return topology.build();
}
本地模式运行DRPC
Config conf = new Config();
conf.setMaxSpoutPending(20);
if (args.length == 0) {
LocalDRPC drpc = new LocalDRPC();
//创建LocalDRPC对象在进程内模拟一个DRPC服务器(类似于LocalCluster在进程内模拟一个Storm集群)
LocalCluster cluster = new LocalCluster();
//创建LocalCluster对象在本地模式运行topology
cluster.submitTopology("wordCounter", conf, buildTopology(drpc));
for (int i = 0; i < 100; i++) {
System.out.println("DRPC RESULT: " + drpc.execute("browserFunction", "114.112.141.6"));
Thread.sleep(1000);
}