package cn.crxy.trident;
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.io.FileUtils;
import storm.trident.TridentTopology;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.spout.IBatchSpout;
import storm.trident.tuple.TridentTuple;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
/**
* trident实现单词计数
* @author 小明
*
*/
public class TridentLocalPologyWordCount {
public static class DataSource implements IBatchSpout{
HashMap<Long, List<List<Object>>> batches = new HashMap<Long, List<List<Object>>>();
@Override
public void ack(long batchId) {
this.batches.remove(batchId);
}
@Override
public void close() {
}
//业务逻辑
@Override
public void emitBatch(long batchId, TridentCollector collector) {
List<List<Object>> batch = this.batches.get(batchId);
Collection<File> files = FileUtils.listFiles(new File("D:\\test"), new String[]{"txt"}, true);
for (File file : files) {
try {
//解析每一个文件的每一行
List<String> readLines = FileUtils.readLines(file);
for (String line : readLines) {
//把每一行数据发送出去
collector.emit(new Values(line));
}
//重命名 防止多次读
FileUtils.moveFile(file, new File(file.getAbsolutePath()+System.currentTimeMillis()));
} catch (Exception e) {
e.printStackTrace();
}
}
}
@Override
public Map getComponentConfiguration() {
Config config = new Config();
return config;
}
@Override
public Fields getOutputFields() {
return new Fields("sentence");
}
@Override
public void open(Map arg0, TopologyContext arg1) {
}
}
public static class SpiltBolt extends BaseFunction{
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
//获取tuple发来数据
String line = tuple.getString(0);
//对每一行数据进行切割
String[] words = line.split("\t");
for (String word : words) {
//把切割的单词发送到下一个bolt
collector.emit(new Values(word));
}
}
}
public static class SumBolt extends BaseFunction{
int sum = 0;
HashMap<String, Integer> map = new HashMap<String, Integer>();
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String word = tuple.getString(0);
Integer value = map.get(word);
if(value==null){
value=0;
}
value++;
//把数据保存到一个map对象中
map.put(word, value);
//把结果写出去
System.err.println("===============================");
for (Entry<String, Integer> entry : map.entrySet()) {
System.err.println(entry);
}
}
}
public static void main(String[] args) {
TridentTopology tridentTopology = new TridentTopology();
tridentTopology.newStream("spout1", new DataSource())
.each(new Fields("sentence"), new SpiltBolt(), new Fields("word"))
.each(new Fields("word"), new SumBolt(), new Fields(""));
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("trident wordcount", new Config(), tridentTopology.build());
}
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.io.FileUtils;
import storm.trident.TridentTopology;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.spout.IBatchSpout;
import storm.trident.tuple.TridentTuple;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
/**
* trident实现单词计数
* @author 小明
*
*/
public class TridentLocalPologyWordCount {
public static class DataSource implements IBatchSpout{
HashMap<Long, List<List<Object>>> batches = new HashMap<Long, List<List<Object>>>();
@Override
public void ack(long batchId) {
this.batches.remove(batchId);
}
@Override
public void close() {
}
//业务逻辑
@Override
public void emitBatch(long batchId, TridentCollector collector) {
List<List<Object>> batch = this.batches.get(batchId);
Collection<File> files = FileUtils.listFiles(new File("D:\\test"), new String[]{"txt"}, true);
for (File file : files) {
try {
//解析每一个文件的每一行
List<String> readLines = FileUtils.readLines(file);
for (String line : readLines) {
//把每一行数据发送出去
collector.emit(new Values(line));
}
//重命名 防止多次读
FileUtils.moveFile(file, new File(file.getAbsolutePath()+System.currentTimeMillis()));
} catch (Exception e) {
e.printStackTrace();
}
}
}
@Override
public Map getComponentConfiguration() {
Config config = new Config();
return config;
}
@Override
public Fields getOutputFields() {
return new Fields("sentence");
}
@Override
public void open(Map arg0, TopologyContext arg1) {
}
}
public static class SpiltBolt extends BaseFunction{
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
//获取tuple发来数据
String line = tuple.getString(0);
//对每一行数据进行切割
String[] words = line.split("\t");
for (String word : words) {
//把切割的单词发送到下一个bolt
collector.emit(new Values(word));
}
}
}
public static class SumBolt extends BaseFunction{
int sum = 0;
HashMap<String, Integer> map = new HashMap<String, Integer>();
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String word = tuple.getString(0);
Integer value = map.get(word);
if(value==null){
value=0;
}
value++;
//把数据保存到一个map对象中
map.put(word, value);
//把结果写出去
System.err.println("===============================");
for (Entry<String, Integer> entry : map.entrySet()) {
System.err.println(entry);
}
}
}
public static void main(String[] args) {
TridentTopology tridentTopology = new TridentTopology();
tridentTopology.newStream("spout1", new DataSource())
.each(new Fields("sentence"), new SpiltBolt(), new Fields("word"))
.each(new Fields("word"), new SumBolt(), new Fields(""));
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("trident wordcount", new Config(), tridentTopology.build());
}
}
测试数据为:aa.txt 往d盘test文件夹丢数据为
a d
c q q
执行结果为:
c=1
a=1
===============================
d=1
c=1
q=1
a=1
===============================
d=1
c=1
q=2
a=1