首先创建一个Topology主类,
然后spout是从kafka就收的数据流 KafkaSpout,
第一个bolt会从前面的spout接收数据,做一些初步的处理,传输给下一个bolt
不适应重量级的计算。
j实时UI无法准确的查看数据的执行情况,准确的性能调优存在一定困难。
所以在这里创建了一个抽象类继承BaseBasicBolt ,然后其他的bolt会继承创建的这个抽象类。
可以在bolt执行前和执行后记录时间。
并记录每个数据的执行流程和各个环节bolt的执行状态和耗时。
import java.util.ArrayList;
import java.util.List;
import org.apache.zookeeper.ZooKeeper;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
public class BasicTopology
{
public static void main(String[] args)
throws Exception
{
if (args == null || args.length < 2) {
throw new NullPointerException("************** Topology args number must be three!");
}
String zkhosts = args[0];
String nimbusHost = args[1];
String name = "user_profile_full_log_test";
TopologyBuilder builder = new TopologyBuilder();
//ZkHosts zkhost = new ZkHosts("192.168.112.138:2181,192.168.112.139:2181,192.168.112.140:2181");
ZkHosts zkhost = new ZkHosts(zkhosts);
String topic = "tracker";
String spoutId = "kafkaSpout";
SpoutConfig spoutConfig = new SpoutConfig(zkhost, topic, "", spoutId);
List<String> zkServers = new ArrayList<String>();
if (zkhosts != null && !zkhosts.isEmpty()) {
for (String host : zkhosts.split(",")) {
zkServers.add(host.split(":")[0]);
}
}
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = Integer.valueOf(2181);
// spoutConfig.forceFromStart = true;
spoutConfig.socketTimeoutMs = 60 * 1000;
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
String zkRoot = "/consumers/" + name + "0";
spoutConfig.zkRoot=zkRoot;
ZKUtils dm=new ZKUtils();
try{
ZooKeeper zk = dm.createZKInstance( zkhosts );
zk.delete(zkRoot, -1);
}catch(Exception e){
System.out.println("e:"+e.getMessage());
}
// builder.setSpout("kafka_reader_test", new RandomSentenceSpout(), Integer.valueOf(1));
builder.setSpout("kafka_reader", new KafkaSpout(spoutConfig), 1);
builder.setBolt("get_usertrack", new GetTrackInfoBolt(),2).shuffleGrouping("kafka_reader");
builder.setBolt("save_userstat", new SaveUserStatBolt(),2).shuffleGrouping("get_usertrack");
// builder.setBolt("save_userprofile", new SaveUserProfileBolt(),16).shuffleGrouping("save_userstat");
//builder.setBolt("save_useraction", new SaveUserActionBolt(),4).shuffleGrouping("get_usertrack");
// builder.setSpout("order_reader", new OrderSpout(), Integer.valueOf(1));
//builder.setBolt("save_useraction_order", new SaveUserActionBolt(),2).shuffleGrouping("order_reader");
// builder.setBolt("order_save_userstat", new SaveUserStatBolt(),8).shuffleGrouping("order_reader");
// builder.setBolt("order_save_userprofile", new SaveUserProfileBolt(),14).shuffleGrouping("order_save_userstat");
// builder.setBolt("save_usermobileprofile", new SaveMobileUserProfileBolt(),4).shuffleGrouping("save_userprofile");
// builder.setBolt("save_mergeuserprofile", new MobileMergePcProfileBolt(),4).shuffleGrouping("save_usermobileprofile");
Config conf = new Config();
//conf.setDebug(true);
// conf.registerMetricsConsumer(MonitorLogConsumer.class, 1);
// List list = new ArrayList();
// list.add("com.yhd.monitor.genlog.TraceTaskHook");
try
{
if (args != null && args.length == 2 ) {
//UserProfileModel model = new UserProfileModel();
//String modelString = JSON.toJSONString(model);
//conf.put(UserProfileConstants.USER_PROFILE_MODEL, modelString);
conf.put("topology.max.spout.pending", Integer.valueOf(1024));
conf.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 60000);
conf.put(Config.STORM_ZOOKEEPER_RETRY_TIMES, 10);
conf.put(Config.STORM_ZOOKEEPER_RETRY_INTERVAL, 1000);
conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 0);
// conf.put(Config.TOPOLOGY_AUTO_TASK_HOOKS, list);//
conf.setNumWorkers(12);
conf.setMaxTaskParallelism(100);
conf.put("nimbus.host", nimbusHost);
conf.put("nimbus.thrift.port", Integer.valueOf(6627));
conf.put("storm.zookeeper.servers", zkServers);
conf.setMessageTimeoutSecs(300);
StormSubmitter.submitTopology(name, conf, builder.createTopology());
}
else {
System.out.println("local:" );
UserProfileModel model = new UserProfileModel();
String modelString = JSON.toJSONString(model);
conf.put(UserProfileConstants.USER_PROFILE_MODEL, modelString);
conf.setMaxTaskParallelism(1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("local_user_profile", conf, builder.createTopology());
Thread.sleep(10000);
cluster.shutdown();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
上面是topology主类,主要是实现bolt流程的衔接,本地运行和线上环境的切换。
下面是创建的基础拦截继承类
public abstract class MonitorBaseBolt extends BaseBasicBolt {
private static final long serialVersionUID = 1L;
final public static String SPLIT_TAG = new String(new byte[] { 1 });
private static Logger log = Logger.getLogger(MonitorBaseBolt.class);
public void execute(Tuple input, BasicOutputCollector collector) {
MessageId mi = input.getMessageId();
Map<Long, Long> map = mi.getAnchorsToIds();
String uid = UUID.randomUUID().toString();
String key = getRootId(map);
MonitorLogExecutor.getInstance().put("s" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() + SPLIT_TAG + new Date().getTime());
preExecute(input, collector);
MonitorLogExecutor.getInstance().put("e" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() + SPLIT_TAG + new Date().getTime());
}
public String getRootId(Map<Long, Long> map) {
if(map == null || map.keySet() == null || map.keySet().isEmpty()) {
return "";
}
String result = "";
for(Long root : map.keySet()) {
result += "k" + root;
}
return result;
}
public void preExecute(Tuple input, BasicOutputCollector collector) {
}
还有执行日志存储
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.hbase.client.Put;
import com.yhd.common.hbase.UserProfileDBHelper;
public class MonitorLogExecutor {
BlockingQueue<String> queue = new LinkedBlockingQueue<String>(1000);
public static MonitorLogExecutor executor;
public static synchronized MonitorLogExecutor getInstance() {
if(executor == null) {
executor = new MonitorLogExecutor();
executor.execute();
}
return executor;
}
public void execute() {
while(true) {
try {
String ml = queue.poll();
if(ml == null) {
Thread.sleep(5000);
continue;
}
//"s" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName()
//+ SPLIT_TAG + new Date().getTime()
String[] vals = ml.split(MonitorBaseBolt.SPLIT_TAG);
Put put = new Put(vals[1].getBytes());
put.add("log".getBytes(), (vals[3] + MonitorBaseBolt.SPLIT_TAG + vals[2] + MonitorBaseBolt.SPLIT_TAG
+ vals[0]).getBytes(), vals[4].getBytes());
UserProfileDBHelper.getInstance().save(put, "real_log");
//batch save
} catch (Exception e) {
e.printStackTrace();
}
}
}
public void put(String data) {
queue.offer(data);
}
}
然后就是第一个bolt的创建
public class GetTrackInfoBolt extends MonitorBaseBolt
{
private static final long serialVersionUID = 1L;
private String strDev = UserStatConstants.PC_TAG;
private int trackNumber = 0;
private static Logger log = Logger.getLogger(GetTrackInfoBolt.class);
private String getRowKey(String userId, String guId) {
if (null == userId || userId.equals("\\N") ||
userId.isEmpty()) {
if(null == guId)
return null;
Matcher matcher = UserActionConstants.NOT_GUID_PATTERN.matcher(guId);
if(matcher.find()){
log.error("invail guid:" + guId );
return null;
}
userId = guId;
}
if(userId.equals("\\N") ||
userId.isEmpty() || userId.equals("null")) {
return null;
}
return userId;
}
public String GetDev(String url) {
boolean bMobile = false;
if(null != url && url.startsWith(UserStatConstants.MOBILE_URL_TAG)) {
bMobile = true;
}
String strDev = bMobile ? UserStatConstants.MOBILE_TAG:
UserStatConstants.PC_TAG;
return strDev;
}
@Override
public void preExecute(Tuple input, BasicOutputCollector collector)
{
try
{
String mesg = input.getString(0);
if ((mesg != null) && (!mesg.isEmpty())) {
String[] trackList = mesg.split("\n");
// List<UserActionTuple> infos = new ArrayList<UserActionTuple>();
for (String track : trackList) {
//flume中是\t,测试是byte 1
String[] trackInfo = (track + " ").split("\t");
//很多时候只发送39个
if ( trackInfo.length < 42 ) {
log.error(trackInfo.length + " " + trackInfo[1]);
//FileUtil.write(CommonConstants.NORMAL_LOG, trackInfo[1]);
//System.out.println("item count is wrong, size:" + trackInfo.length + trackInfo[1]);
continue;
}
String url = trackInfo[1];
String referer = trackInfo[2];
String guId = trackInfo[5];
String sessionId = trackInfo[10];
String trackTime = trackInfo[17];
String userId = trackInfo[18];
String productIds = trackInfo[21];
String provinceId = trackInfo[38];
String cityId = trackInfo[41].trim();
String ieVersion = trackInfo[29];
String platform = trackInfo[30];
String linkPositon = trackInfo[34];
String buttonPosition = trackInfo[35];
log.error("trackTime:" + trackTime );
// System.out.println("######:" + userId + " " + guId + " " +
// url + " " + linkPositon + " " + buttonPosition);
StringBuilder strProductIds = new StringBuilder( productIds );
UserActionQualifier userActionQualifier = null;
userActionQualifier = ProActionAnalyzer.getProductActionType(url,strProductIds,
linkPositon, buttonPosition);
if(null == userActionQualifier) {
userActionQualifier = ProSetActionAnalyzer.getProductSetActionType( url );
}
// if(null == userActionQualifier) {
// userActionQualifier = OtherActionAnalyzer.getOtherActionType( url );
// }
if ( null == userActionQualifier ) {
//userActionQualifier = new UserActionQualifier(UserActionConstants.LEAVEACTION);
//System.out.println("no action:" + url);
continue;
}
//String strKey = getRowKey(userId, guId, trackTime);
String strKey = getRowKey(userId, guId);
if (null == strKey)
continue;
strDev = GetDev( url );
UserAction userAction = new UserAction();
userAction.setS(sessionId);
userAction.setP(provinceId);
userAction.setC(cityId);
userAction.setO(platform);
userAction.setB(ieVersion);
userAction.setR(referer);
userAction.setL(linkPositon);
userAction.setBP(buttonPosition);
userAction.setU(url);
userAction.setA(userActionQualifier.getActionType());
String userKey = strDev + strKey;
String userType = userActionQualifier.getLogType() + CommonConstants.TRACK_SPLIT
+ userActionQualifier.getActionType()
+ CommonConstants.TRACK_SPLIT + userActionQualifier.getActionObject();
//collector.emit(new Values(userKey,userType,trackTime,userAction));
UserActionTuple userActionTuple = new UserActionTuple();
userActionTuple.setUserKey(userKey);
userActionTuple.setUserType(userType);
userActionTuple.setTrackTime(trackTime);
userActionTuple.setUserAction(userAction);
trackNumber += 1;
//infos.add(userActionTuple);
collector.emit(new Values(userActionTuple));
}
// if ((infos != null) && (!infos.isEmpty())) {
// log.error("The number of useful track is " + trackNumber + " " + infos.size());
// collector.emit(new Values(new Object[] { infos }));
// }
}
}
catch (Exception e) {
log.error("split track wrong:" + e.toString());
e.printStackTrace();
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer)
{
//declarer.declare(new Fields("userkey","type","time","userAction"));
declarer.declare(new Fields("trackInfos"));
}
}
描述了大致的storm实现流程。