storm引入zookeeper锁控制线程操作

storm两类统计逻辑

汇总型:如网站pv,销售额,订单数等

去重:网站UV,顾客数,销售商品数等


汇总型方案:

1,shuffleGrouping下,pv(单线程结果)*executor并发数

一个executor默认一个task,如果设置task数大于1,公式应该是:

pv(单线程结果)*task数

同一个executor下task的线程id相同,但是taskid不同


优点:简单,计算量小

缺点:稍有误差,但绝大多数场景能接受


优化:

案例PVBolt中每个task都会输出一个汇总量,实际只需要一个task输出总值,利用zookeeper锁来做到只有一个task输出汇总数,而且每5S输出一次


2,bolt1进行多并发局部汇总,bolt2单线程进行全局汇总

优点:1,绝对准确;2,如果用fieldGrouping可以得到中间值,如单个user的访问PV(访问深度,也是有用指标)

缺点:计算量稍大,且多一个bolt


生产数据:

package base;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

import java.util.Map;
import java.util.Queue;
import java.util.Random;
import java.util.concurrent.ConcurrentLinkedQueue;

/**
 * Created by Administrator on 2016/10/7.
 */
public class SourceSpout implements IRichSpout{

    /*
    数据源Spout
     */
    private static final long serialVersionUID = 1L;
    Queue<String> queue = new ConcurrentLinkedQueue<String>();
    SpoutOutputCollector collector = null;
    String str = null;

    @Override
    public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
        try{
            this.collector = spoutOutputCollector;
            Random random = new Random();
            String[] hosts = {"www.taobao.com"};
            String[] session_id = { "ABYH6Y4V4SCVXTG6DPB4VH9U123", "XXYH6YCGFJYERTT834R52FDXV9U34", "BBYH61456FGHHJ7JL89RG5VV9UYU7",
                    "CYYH6Y2345GHI899OFG4V9U567", "VVVYH6Y4V4SFXZ56JIPDPB4V678" };
            String[] time = { "2014-01-07 08:40:50", "2014-01-07 08:40:51", "2014-01-07 08:40:52", "2014-01-07 08:40:53",
                    "2014-01-07 09:40:49", "2014-01-07 10:40:49", "2014-01-07 11:40:49", "2014-01-07 12:40:49" };
            for (int i = 0;i < 100; i++){
                queue.add(hosts[0]+"\t"+session_id[random.nextInt(5)]+"\t"+time[random.nextInt(8)]);

            }
        }catch (Exception e){
            e.printStackTrace();
        }
    }

    @Override
    public void close() {

    }

    @Override
    public void activate() {

    }

    @Override
    public void deactivate() {

    }

    @Override
    public void nextTuple() {
        if(queue.size() >= 0){
            collector.emit(new Values(queue.poll()));
            try {
                Thread.sleep(200);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    @Override
    public void ack(Object o) {
        System.out.println("spout ack:"+o.toString());

    }

    @Override
    public void fail(Object o) {
        System.out.println("spout fail:"+o.toString());

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("log"));
    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

处理数据:

package visits;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import backtype.storm.utils.ZookeeperAuthInfo;
import org.apache.http.conn.util.InetAddressUtils;
import org.apache.zookeeper.*;

import java.net.InetAddress;
import java.util.Map;

/**
 * Created by Administrator on 2016/10/6.
 * storm kill作业
 * storm kill PvTopo
 * storm提交作业
 * storm jar ./starter.jar visits.PvTopo PvTopo
 *
 */
public class PVBolt implements IRichBolt {


    /*
    这种irichbolt形式就是成功的时候要显性的调ack方法
    失败的时候掉fail方法
     */
    private static final long serialVersionUID = 1L;

    /*
    执行前需要在zookeeper上把目录建立一下
    zkCli.sh -server localhost:2181
    ls /
    create /lock ""
    create /lock/storm ""
    ls /lock
    这样就创建好了
     */
    public static final String zk_path = "/lock/storm/pv";
    ZooKeeper zKeeper = null;
    String lockData = null;

    @Override
    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        try{
            zKeeper = new ZooKeeper("192.168.1.107:2181,192.168.1.108:2181",3000,new Watcher(){

                @Override
                public void process(WatchedEvent watchedEvent) {
                    System.out.println("event:"+watchedEvent.getType());
                }
            });
            while (zKeeper.getState() != ZooKeeper.States.CONNECTED){
                Thread.sleep(1000);
            }

            InetAddress address = InetAddress.getLocalHost();
            //ip地址和taskip的组合肯定是唯一的
            lockData = address.getHostAddress() + ":" + topologyContext.getThisTaskId();
            //false的意思是不放监听上去
            if(zKeeper.exists(zk_path, false) == null){
                zKeeper.create(zk_path, lockData.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
            }

        }catch(Exception e){
            try {
                zKeeper.close();
            } catch (InterruptedException e1) {
                e1.printStackTrace();
            }

        }
    }

    String logString = null;
    String sessionid = null;
    //static long Pv = 0;
    long Pv = 0;
    long beginTime = System.currentTimeMillis();
    long endTimes = 0;
    @Override
    public void execute(Tuple tuple) {
        try{
            endTimes =  System.currentTimeMillis();
            logString = tuple.getString(0);
            if (logString != null){
                sessionid = logString.split("\t")[1];

       /* *//*
        这种多线程下做计算我们还必须得有synchronized使它线程安全
        这样肯定就和单线程一样
        然而还是不够健全,因为synchronized和lock在单jvm下有效,单在多jvm下无效

         *//*
        synchronized (this){
            if(sessionid != null){
                Pv ++;
            }
        }*/

                //shuffleGrouping下,pv* Executor并发数就是统计的pv
                //因为shufflegrouping是平均分配,而我们有两个线程
                //
                if(sessionid != null){
                    Pv ++;
                }



            }

            if (endTimes - beginTime >= 5*1000){
                System.err.println(lockData+"=========================");
                if(lockData.equals(zKeeper.getData(zk_path, false, null))){
                    System.out.println("pv ================== "+ Pv * 4);
                }
                beginTime = System.currentTimeMillis();
            }


        }catch(Exception e){
            e.printStackTrace();
        }


    }

    @Override
    public void cleanup() {

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

执行main:

package visits;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
import base.SourceSpout;
import myfirst.MySpout;

import java.util.HashMap;
import java.util.Map;

public class PvTopo {

    /**
     * @param args
     * 多并发下是无法做全局汇总的
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

        TopologyBuilder builder = new TopologyBuilder();

        builder.setSpout("spout", new SourceSpout(), 1);
        builder.setBolt("bolt", new PVBolt(), 4).shuffleGrouping("spout");
        /*
        对单线程来讲,什么grouping都是一样的。。
         */
        builder.setBolt("sumBolt", new PVSumBolt(), 1).shuffleGrouping("bolt");

        Map conf = new HashMap();
        conf.put(Config.TOPOLOGY_WORKERS, 4);

        if (args.length > 0) {
            try {
                StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
            } catch (AlreadyAliveException e) {
                e.printStackTrace();
            } catch (InvalidTopologyException e) {
                e.printStackTrace();
            }
        }else {
            LocalCluster localCluster = new LocalCluster();
            localCluster.submitTopology("mytopology", conf, builder.createTopology());
        }





    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值