【网站点击流实时数据分析】04-Storm程序开发

1、Storm程序整体目录

2、bloom

/**
 * Describe: bloomFilter的java实现
 * 开源地址:https://github.com/maoxiangyi/Java-BloomFilter
 */
public class BloomFilter<E> implements Serializable {
    private BitSet bitset;
    private int bitSetSize;
    private double bitsPerElement;
    private int expectedNumberOfFilterElements; // 预计添加到过滤器中的最大数量
    private int numberOfAddedElements; //实际添加到过滤器中的数据量
    private int k; // 通过hash的次数

    static final Charset charset = Charset.forName("UTF-8"); // 用来存储hash值的字符串的编码格式

    static final String hashName = "MD5"; // 通过MD5进行加密后使用hash算法,满足大多数的情况下
    static final MessageDigest digestFunction;
    static { // The digest method is reused between instances
        MessageDigest tmp;
        try {
            tmp = MessageDigest.getInstance(hashName);
        } catch (NoSuchAlgorithmException e) {
            tmp = null;
        }
        digestFunction = tmp;
    }

    /**
     * 构造一个空的布隆过滤器,它的总长度是c *n
     * @param c 每个元素使用的byte数
     * @param n 预计过滤器将包含的元素个数
     * @param k 使用hash函数的数量
     */
    public BloomFilter(double c, int n, int k) {
        this.expectedNumberOfFilterElements = n;
        this.k = k;
        this.bitsPerElement = c;
        this.bitSetSize = (int)Math.ceil(c * n);
        numberOfAddedElements = 0;
        this.bitset = new BitSet(bitSetSize);
    }

    /**
     * 构造一个空的布隆过滤器,哈希函数的最优次数根据布隆过滤器总的大小和预期最大的元素。
     *
     * @param bitSetSize 定义总过有多少个byte位被布隆过滤器使用
     * @param expectedNumberOElements 定义布隆过滤器将要存放的最大元素个数
     */
    public BloomFilter(int bitSetSize, int expectedNumberOElements) {
        this(bitSetSize / (double)expectedNumberOElements,
                expectedNumberOElements,
                (int) Math.round((bitSetSize / (double)expectedNumberOElements) * Math.log(2.0)));
    }

    /**
     * 构造一个空的过滤器,并设置一个命中精度。
     * 自动根据设置的命中精度来预估每个元素的比特数和最大的hash数
     *
     * @param falsePositiveProbability 手动设置一个命中的精度
     * @param expectedNumberOfElements 定义布隆过滤器将要存放的最大元素个数
     */
    public BloomFilter(double falsePositiveProbability, int expectedNumberOfElements) {
        this(Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2))) / Math.log(2), // c = k / ln(2)
                expectedNumberOfElements,
                (int)Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2)))); // k = ceil(-log_2(false prob.))
    }

    /**
     * 构造一个布隆过滤器,在已经存在的数据集上。
     * @param bitSetSize 定义总过有多少个byte位被布隆过滤器使用
     * @param expectedNumberOfFilterElements 定义布隆过滤器将要存放的最大元素个数
     * @param actualNumberOfFilterElements 定义还将有多少个元素插入到已经已经存在的bloomFilter数据集上
     * @param filterData 定义已经存在的数据集
     */
    public BloomFilter(int bitSetSize, int expectedNumberOfFilterElements, int actualNumberOfFilterElements, BitSet filterData) {
        this(bitSetSize, expectedNumberOfFilterElements);
        this.bitset = filterData;
        this.numberOfAddedElements = actualNumberOfFilterElements;
    }

    /**
     * Generates a digest based on the contents of a String.
     *
     * @param val 指定输入的数据
     * @param charset 指定编码格式
     * @return digest as long.
     */
    public static int createHash(String val, Charset charset) {
        return createHash(val.getBytes(charset));
    }

    /**
     * Generates a digest based on the contents of a String.
     *
     * @param val 指定输入的数据,默认的编码格式是 UTF-8.
     * @return digest as long.
     */
    public static int createHash(String val) {
        return createHash(val, charset);
    }

    /**
     * Generates a digest based on the contents of an array of bytes.
     *
     * @param data 指定输入的数据,数据是byte数组
     * @return digest as long.
     */
    public static int createHash(byte[] data) {
        return createHashes(data, 1)[0];
    }

    /**
     * 将一个字节数据分成四个字节,每个字节生成一个整数存放在数组中
     *
     * digest function is called until the required number of int's are produced.
     * For each call to digest a salt is prepended to the data. The salt is increased by 1 for each call.
     *
     * @param data 指定输入数据
     * @param hashes 需要hash的次数
     * @return array 通过hash之后产生的int数量
     */
    public static int[] createHashes(byte[] data, int hashes) {
        int[] result = new int[hashes];
        int k = 0;
        byte salt = 0;
        while (k < hashes) {
            byte[] digest;
            synchronized (digestFunction) {
                digestFunction.update(salt);
                salt++;
                digest = digestFunction.digest(data);
            }

            for (int i = 0; i < digest.length/4 && k < hashes; i++) {
                int h = 0;
                for (int j = (i*4); j < (i*4)+4; j++) {
                    h <<= 8;
                    h |= ((int) digest[j]) & 0xFF;
                }
                result[k] = h;
                k++;
            }

        }
        System.out.println(result);
        return result;
    }

    /**
     * Compares the contents of two instances to see if they are equal.
     *
     * @param obj is the object to compare to.
     * @return True if the contents of the objects are equal.
     */
    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final BloomFilter<E> other = (BloomFilter<E>) obj;
        if (this.expectedNumberOfFilterElements != other.expectedNumberOfFilterElements) {
            return false;
        }
        if (this.k != other.k) {
            return false;
        }
        if (this.bitSetSize != other.bitSetSize) {
            return false;
        }
        if (this.bitset != other.bitset && (this.bitset == null || !this.bitset.equals(other.bitset))) {
            return false;
        }
        return true;
    }

    /**
     * Calculates a hash code for this class.
     * @return hash code representing the contents of an instance of this class.
     */
    @Override
    public int hashCode() {
        int hash = 7;
        hash = 61 * hash + (this.bitset != null ? this.bitset.hashCode() : 0);
        hash = 61 * hash + this.expectedNumberOfFilterElements;
        hash = 61 * hash + this.bitSetSize;
        hash = 61 * hash + this.k;
        return hash;
    }

    /**
     * Calculates the expected probability of false positives based on
     * the number of expected filter elements and the size of the Bloom filter.
     * <br /><br />
     * The value returned by this method is the <i>expected</i> rate of false
     * positives, assuming the number of inserted elements equals the number of
     * expected elements. If the number of elements in the Bloom filter is less
     * than the expected value, the true probability of false positives will be lower.
     *
     * @return expected probability of false positives.
     */
    public double expectedFalsePositiveProbability() {
        return getFalsePositiveProbability(expectedNumberOfFilterElements);
    }

    /**
     * Calculate the probability of a false positive given the specified
     * number of inserted elements.
     *
     * @param numberOfElements number of inserted elements.
     * @return probability of a false positive.
     */
    public double getFalsePositiveProbability(double numberOfElements) {
        // (1 - e^(-k * n / m)) ^ k
        return Math.pow((1 - Math.exp(-k * (double) numberOfElements
                / (double) bitSetSize)), k);

    }

    /**
     * Get the current probability of a false positive. The probability is calculated from
     * the size of the Bloom filter and the current number of elements added to it.
     *
     * @return probability of false positives.
     */
    public double getFalsePositiveProbability() {
        return getFalsePositiveProbability(numberOfAddedElements);
    }


    /**
     * Returns the value chosen for K.<br />
     * <br />
     * K is the optimal number of hash functions based on the size
     * of the Bloom filter and the expected number of inserted elements.
     *
     * @return optimal k.
     */
    public int getK() {
        return k;
    }

    /**
     * Sets all bits to false in the Bloom filter.
     */
    public void clear() {
        bitset.clear();
        numberOfAddedElements = 0;
    }

    /**
     * Adds an object to the Bloom filter. The output from the object's
     * toString() method is used as input to the hash functions.
     *
     * @param element is an element to register in the Bloom filter.
     */
    public void add(E element) {
        add(element.toString().getBytes(charset));
    }

    /**
     * Adds an array of bytes to the Bloom filter.
     *
     * @param bytes array of bytes to add to the Bloom filter.
     */
    public void add(byte[] bytes) {
        int[] hashes = createHashes(bytes, k);
        for (int hash : hashes)
            bitset.set(Math.abs(hash % bitSetSize), true);
        numberOfAddedElements ++;
    }

    /**
     * Adds all elements from a Collection to the Bloom filter.
     * @param c Collection of elements.
     */
    public void addAll(Collection<? extends E> c) {
        for (E element : c)
            add(element);
    }

    /**
     * Returns true if the element could have been inserted into the Bloom filter.
     * Use getFalsePositiveProbability() to calculate the probability of this
     * being correct.
     *
     * @param element element to check.
     * @return true if the element could have been inserted into the Bloom filter.
     */
    public boolean contains(E element) {
        return contains(element.toString().getBytes(charset));
    }

    /**
     * Returns true if the array of bytes could have been inserted into the Bloom filter.
     * Use getFalsePositiveProbability() to calculate the probability of this
     * being correct.
     *
     * @param bytes array of bytes to check.
     * @return true if the array could have been inserted into the Bloom filter.
     */
    public boolean contains(byte[] bytes) {
        int[] hashes = createHashes(bytes, k);
        for (int hash : hashes) {
            if (!bitset.get(Math.abs(hash % bitSetSize))) {
                return false;
            }
        }
        return true;
    }

    /**
     * Returns true if all the elements of a Collection could have been inserted
     * into the Bloom filter. Use getFalsePositiveProbability() to calculate the
     * probability of this being correct.
     * @param c elements to check.
     * @return true if all the elements in c could have been inserted into the Bloom filter.
     */
    public boolean containsAll(Collection<? extends E> c) {
        for (E element : c)
            if (!contains(element))
                return false;
        return true;
    }

    /**
     * Read a single bit from the Bloom filter.
     * @param bit the bit to read.
     * @return true if the bit is set, false if it is not.
     */
    public boolean getBit(int bit) {
        return bitset.get(bit);
    }

    /**
     * Set a single bit in the Bloom filter.
     * @param bit is the bit to set.
     * @param value If true, the bit is set. If false, the bit is cleared.
     */
    public void setBit(int bit, boolean value) {
        bitset.set(bit, value);
    }

    /**
     * Return the bit set used to store the Bloom filter.
     * @return bit set representing the Bloom filter.
     */
    public BitSet getBitSet() {
        return bitset;
    }

    /**
     * Returns the number of bits in the Bloom filter. Use count() to retrieve
     * the number of inserted elements.
     *
     * @return the size of the bitset used by the Bloom filter.
     */
    public int size() {
        return this.bitSetSize;
    }

    /**
     * 返回添加到布隆过滤器的元素数量。
     *
     * @return 添加到布隆过滤器的元素数量。
     */
    public int count() {
        return this.numberOfAddedElements;
    }

    /**
     * 返回布隆过滤器中预期最大的值,这个只和传给构造器的值是一样的。
     * @return 布隆过滤器中预期最大的值
     */
    public int getExpectedNumberOfElements() {
        return expectedNumberOfFilterElements;
    }

    /**
     * Get expected number of bits per element when the Bloom filter is full.
     * This value is set by the constructor when the Bloom filter is created. See also getBitsPerElement().
     *
     * @return expected number of bits per element.
     */
    public double getExpectedBitsPerElement() {
        return this.bitsPerElement;
    }

    /**
     * Get actual number of bits per element based on the number of elements that have currently been inserted and the length
     * of the Bloom filter. See also getExpectedBitsPerElement().
     *
     * @return number of bits per element.
     */
    public double getBitsPerElement() {
        return this.bitSetSize / (double)numberOfAddedElements;
    }
}
/**
 * Describe: BloomFilter的测试类
 */
public class BloomfilterBenchmark {
    static int elementCount = 1; // Number of elements to test

    public static void printStat(long start, long end) {
        double diff = (end - start) / 1000.0;
        System.out.println(diff + "s, " + (elementCount / diff) + " elements/s");
    }

    public static void main(String[] argv) {
        final Random r = new Random();

        // 创建50000个元素,用来添加到过滤器中
        List<String> existingElements = new ArrayList(elementCount);
        for (int i = 0; i < elementCount; i++) {
            byte[] b = new byte[200];
            r.nextBytes(b);
            existingElements.add(new String(b));
        }

        //创建500000个元素,用作比较
        List<String> nonExistingElements = new ArrayList(elementCount);
        for (int i = 0; i < elementCount; i++) {
            byte[] b = new byte[200];
            r.nextBytes(b);
            nonExistingElements.add(new String(b));
        }

        //设置一个空的布隆过滤器,设置命中高精度和预期存放的最大元素个数据
        //这个构造器能够,能够自动算出hash函数的次数
        BloomFilter<String> bf = new BloomFilter<String>(0.001, elementCount);

        //打印测试的元素个数
        System.out.println("Testing " + elementCount + " elements");
        //打印计算出来的最优hash次数
        System.out.println("k is " + bf.getK());

        // 添加500w个元素,看看平均添加时间
        //添加50w个元素需要3.24秒,平均每秒添加15w个元素。
        //add(): 3.24s, 154320.98765432098 elements/s
        System.out.print("add(): ");
        long start_add = System.currentTimeMillis();
        for (int i = 0; i < elementCount; i++) {
            bf.add(existingElements.get(i));
        }
        long end_add = System.currentTimeMillis();
        printStat(start_add, end_add);

        // 检查50w个元素是否存在,需要的时间
        //contains(), existing: 3.181s, 157183.27569946556 elements/s
        //检查50w个元素是否存在,耗时3.18秒,每秒15w个
        System.out.print("contains(), existing: ");
        long start_contains = System.currentTimeMillis();
        for (int i = 0; i < elementCount; i++) {
            bf.contains(existingElements.get(i));
        }
        long end_contains = System.currentTimeMillis();
        printStat(start_contains, end_contains);

        // Check for existing elements with containsAll()
        System.out.print("containsAll(), existing: ");
        long start_containsAll = System.currentTimeMillis();
        for (int i = 0; i < elementCount; i++) {
            bf.contains(existingElements.get(i));
        }
        long end_containsAll = System.currentTimeMillis();
        printStat(start_containsAll, end_containsAll);

        // Check for nonexisting elements with contains()
        System.out.print("contains(), nonexisting: ");
        long start_ncontains = System.currentTimeMillis();
        for (int i = 0; i < elementCount; i++) {
            bf.contains(nonExistingElements.get(i));
        }
        long end_ncontains = System.currentTimeMillis();
        printStat(start_ncontains, end_ncontains);

        // Check for nonexisting elements with containsAll()
        System.out.print("containsAll(), nonexisting: ");
        long start_ncontainsAll = System.currentTimeMillis();
        for (int i = 0; i < elementCount; i++) {
            bf.contains(nonExistingElements.get(i));
        }
        long end_ncontainsAll = System.currentTimeMillis();
        printStat(start_ncontainsAll, end_ncontainsAll);

    }
}

3、bolt

public class MessageFilterBolt extends BaseBasicBolt {
    @Override
    public void execute(Tuple input, BasicOutputCollector collector) {
        //获取KafkaSpout发送出来的数据
        String line = input.getString(0);
        //对数据进行解析
        LogMessage logMessage = LogAnalyzeHandler.parser(line);
        if (logMessage == null || !LogAnalyzeHandler.isValidType(logMessage.getType())) {
            return;
        }
        collector.emit(new Values(logMessage.getType(), logMessage));
        //定时更新规则信息
        LogAnalyzeHandler.scheduleLoad();
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        //根据点击内容类型将日志进行区分
        declarer.declare(new Fields("type", "message"));
    }
}
public class ProcessMessage extends BaseBasicBolt {
    @Override
    public void execute(Tuple input, BasicOutputCollector collector) {
        LogMessage logMessage = (LogMessage) input.getValueByField("message");
        LogAnalyzeHandler.process(logMessage);
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
    }
}

4、constant

/**
 * Describe: 用户行为日志的主要分类包括:1:浏览日志、2:点击日志、3:搜索日志、4:购买日志
 */
public class LogTypeConstant {
    //浏览类型的数据
    public static final int VIEW = 1;
    //点击类型的数据
    public static final int CLICK = 2;
    //搜索类型的数据
    public static final int SEARCH = 3;
    //购买类型的数据
    public static final int BUY = 4;
}

5、dao

public class DataSourceUtil {
    private static DataSource dataSource;

    static {
        dataSource = new ComboPooledDataSource("logAnalyze");
    }

    public static synchronized DataSource getDataSource() {
        if (dataSource == null) {
            dataSource = new ComboPooledDataSource();
        }
        return dataSource;
    }

    public static void main(String[] args) {
        JdbcTemplate jdbcTemplate = new JdbcTemplate(dataSource);
        Record record = new Record();
        record.setAppId(1);
        record.setRuleId(1);
        record.setIsEmail(1);
        record.setIsPhone(1);
        record.setIsColse(0);
        String sql = "INSERT INTO `log_monitor`.`log_monitor_rule_record` (`appId`,`ruleId`,`isEmail`,`isPhone`,`isColse`,`noticeInfo`,`updataDate`) VALUES ( ?,?,?,?,?,?,?)";
        jdbcTemplate.update(sql, record.getAppId(), record.getRuleId(), record.getIsEmail(), record.getIsPhone(), 0, record.getLine(),new Date());
    }
}
public class LogAnalyzeDao {
    private static Logger logger = Logger.getLogger(LogAnalyzeDao.class);
    private JdbcTemplate jdbcTemplate;

    public LogAnalyzeDao() {
        jdbcTemplate = new JdbcTemplate(DataSourceUtil.getDataSource());
    }

    public List<LogAnalyzeJob> loadJobList() {
        String sql = "SELECT `jobId`,`jobName`,`jobType` FROM `log_analyze`.`log_analyze_job` WHERE STATUS= 1";
        return jdbcTemplate.query(sql, new BeanPropertyRowMapper<LogAnalyzeJob>(LogAnalyzeJob.class));
    }

    public List<LogAnalyzeJobDetail> loadJobDetailList() {
        String sql = "SELECT condi.`jobId`,condi.`field`,condi.`value`,condi.`compare` " +
                " FROM `log_analyze`.`log_analyze_job` AS job " +
                " LEFT JOIN `log_analyze`.`log_analyze_job_condition` AS condi  " +
                " ON job.`jobId` = condi.`jobId` " +
                " WHERE job.`status` =1";
        return jdbcTemplate.query(sql, new BeanPropertyRowMapper<LogAnalyzeJobDetail>(LogAnalyzeJobDetail.class));
    }

    public int[][] saveMinuteAppendRecord(List<BaseRecord> appendDataList) {
        String sql = "INSERT INTO `log_analyze`.`log_analyze_job_nimute_append` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
        return saveAppendRecord(appendDataList, sql);
    }

    public int[][] saveHalfAppendRecord(List<BaseRecord> appendDataList) {
        String sql = "INSERT INTO `log_analyze`.`log_analyze_job_half_append` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
        return saveAppendRecord(appendDataList, sql);
    }

    public int[][] saveHourAppendRecord(List<BaseRecord> appendDataList) {
        String sql = "INSERT INTO `log_analyze`.`log_analyze_job_hour_append` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
        return saveAppendRecord(appendDataList, sql);
    }

    public int[][] saveDayAppendRecord(List<BaseRecord> appendDataList) {
        String sql = "INSERT INTO `log_analyze`.`log_analyze_job_day` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
        return saveAppendRecord(appendDataList, sql);
    }

    public int[][] saveAppendRecord(List<BaseRecord> appendDataList, String sql) {
        return jdbcTemplate.batchUpdate(sql, appendDataList, appendDataList.size(), new ParameterizedPreparedStatementSetter<BaseRecord>() {
            @Override
            public void setValues(PreparedStatement ps, BaseRecord argument) throws SQLException {
                ps.setString(1, argument.getIndexName());
                ps.setInt(2, argument.getPv());
                ps.setLong(3, argument.getUv());
                ps.setTimestamp(4, new Timestamp(new Date().getTime()));
                ps.setTimestamp(5, new Timestamp(new Date().getTime()));
            }
        });
    }

    public List<BaseRecord> sumRecordValue(String startTime, String endTime) {
        String sql = "SELECT indexName,SUM(pv) AS pv,SUM(uv) AS uv FROM `log_analyze_job_nimute_append` " +
                " WHERE  executeTime BETWEEN  '" + startTime + "' AND '" +endTime+"' "+
                " GROUP BY indexName";
        return jdbcTemplate.query(sql , new BeanPropertyRowMapper<BaseRecord>(BaseRecord.class));
    }
    
}

6、domain

public class LogAnalyzeJob {
    private String jobId ;
    private String jobName;
    private int jobType; //1:浏览日志、2:点击日志、3:搜索日志、4:购买日志
    private int bussinessId;
    private int status;
}
public class LogAnalyzeJobDetail {
    private int id;
    private int jobId;
    private String field;
    private String value;
    private int compare;
}
public class LogMessage implements Serializable {
    private static final long serialVersionUID = 7270840760720823716L;
    private int type;//1:浏览日志、2:点击日志、3:搜索日志、4:购买日志
    private String hrefTag;//标签标识
    private String hrefContent;//标签对应的标识,主要针对a标签之后的内容
    private String referrerUrl;//来源网址
    private String requestUrl;//来源网址
    private String clickTime;//点击时间
    private String appName;//浏览器类型
    private String appVersion;//浏览器版本
    private String language;//浏览器语言
    private String platform;//操作系统
    private String screen;//屏幕尺寸
    private String coordinate;//鼠标点击时的坐标
    private String systemId; //产生点击流的系统编号
    private String userName;//用户名称
}

7、utils

public class LogAnalyzeHandler {
    //定时加载配置文件的标识
    private static boolean reloaded = false;
    //用来保存job信息,key为jobType,value为该类别下所有的任务。
    private static Map<String, List<LogAnalyzeJob>> jobMap;
    //用来保存job的判断条件,key为jobId,value为list,list中封装了很多判断条件。
    private static Map<String, List<LogAnalyzeJobDetail>> jobDetail;

    static {
        jobMap = loadJobMap();
        jobDetail = loadJobDetailMap();
    }

    public static LogMessage parser(String line) {
        LogMessage logMessage = new Gson().fromJson(line, LogMessage.class);
        return logMessage;
    }
    
    /**
     * pv 在redis中是string,key为:log:{jobId}:pv:{20151116},value=pv数量。
     * uv 使用java-bloomFilter计算,https://github.com/maoxiangyi/Java-BloomFilter
     *
     * @param logMessage
     */
    public static void process(LogMessage logMessage) {
        if (jobMap == null || jobDetail == null) {
            loadDataModel();
        }
        // kafka来的日志:2,req,ref,xxx,xxx,xxx,yy
        List<LogAnalyzeJob> analyzeJobList = jobMap.get(logMessage.getType()+"");
        for (LogAnalyzeJob logAnalyzeJob : analyzeJobList) {
            boolean isMatch = false; //是否匹配
            List<LogAnalyzeJobDetail> logAnalyzeJobDetailList = jobDetail.get(logAnalyzeJob.getJobId());
            for (LogAnalyzeJobDetail jobDetail : logAnalyzeJobDetailList) {
                //jobDetail,指定和kakfa输入过来的数据中的 requesturl比较
                // 获取kafka输入过来的数据的requesturl的值
                String fieldValueInLog = logMessage.getCompareFieldValue(jobDetail.getField());
                //1:包含 2:等于 3:正则
                if (jobDetail.getCompare() == 1 && fieldValueInLog.contains(jobDetail.getValue())) {
                    isMatch = true;
                } else if (jobDetail.getCompare() == 2 && fieldValueInLog.equals(jobDetail.getValue())) {
                    isMatch = true;
                } else {
                    isMatch = false;
                }
                if (!isMatch) {
                    break;
                }
            }
            if (isMatch) {
                //设置pv
                String pvKey = "log:" + logAnalyzeJob.getJobName() + ":pv:" + DateUtils.getDate();
                String uvKey = "log:" + logAnalyzeJob.getJobName() + ":uv:" + DateUtils.getDate();
                ShardedJedis jedis = MyShardedJedisPool.getShardedJedisPool().getResource();
                //给pv+1
                jedis.incr(pvKey);
                //设置uv,uv需要去重,使用set
                jedis.sadd(uvKey, logMessage.getUserName());

            }
        }
    }

    /**
     * 计算单个指标点击的数量
     */
    private static void processViewLog(LogMessage logMessage) {
        if (jobMap == null || jobDetail == null) {
            loadDataModel();
        }
        List<LogAnalyzeJob> analyzeJobList = jobMap.get(LogTypeConstant.VIEW + "");
        for (LogAnalyzeJob logAnalyzeJob : analyzeJobList) {
            boolean isMatch = false;
            List<LogAnalyzeJobDetail> logAnalyzeJobDetailList = jobDetail.get(logAnalyzeJob.getJobId());
            for (LogAnalyzeJobDetail jobDetail : logAnalyzeJobDetailList) {
                String fieldValueInLog = logMessage.getCompareFieldValue(jobDetail.getField());
                //1:包含 2:等于
                if (jobDetail.getCompare() == 1 && fieldValueInLog.contains(jobDetail.getValue())) {
                    isMatch = true;
                } else if (jobDetail.getCompare() == 2 && fieldValueInLog.equals(jobDetail.getValue())) {
                    isMatch = true;
                } else {
                    isMatch = false;
                }
                if (!isMatch) {
                    break;
                }
            }
            if (isMatch) {
                //设置pv
                String pvKey = "log:" + logAnalyzeJob.getJobName() + ":pv:" + DateUtils.getDate();
                String uvKey = "log:" + logAnalyzeJob.getJobName() + ":uv:" + DateUtils.getDate();
                ShardedJedis jedis = MyShardedJedisPool.getShardedJedisPool().getResource();
                jedis.incr(pvKey);
                //设置uv
                jedis.sadd(uvKey, logMessage.getUserName());
                //优惠策略,使用bloomFilter算法进行优化
            }
        }
    }

    private synchronized static void loadDataModel() {
        if (jobMap == null) {
            jobMap = loadJobMap();
        }
        if (jobDetail == null) {
            jobDetail = loadJobDetailMap();
        }
    }

    private static Map<String, List<LogAnalyzeJobDetail>> loadJobDetailMap() {
        Map<String, List<LogAnalyzeJobDetail>> map = new HashMap<String, List<LogAnalyzeJobDetail>>();
        List<LogAnalyzeJobDetail> logAnalyzeJobDetailList = new LogAnalyzeDao().loadJobDetailList();
        for (LogAnalyzeJobDetail logAnalyzeJobDetail : logAnalyzeJobDetailList) {
            int jobId = logAnalyzeJobDetail.getJobId();
            List<LogAnalyzeJobDetail> jobDetails = map.get(jobId + "");
            if (jobDetails == null || jobDetails.size() == 0) {
                jobDetails = new ArrayList<>();
                map.put(jobId + "", jobDetails);
            }
            jobDetails.add(logAnalyzeJobDetail);
        }
        System.out.println("jobDetailMap:  "+map);
        return map;
    }

    private static Map<String, List<LogAnalyzeJob>> loadJobMap() {
        Map<String, List<LogAnalyzeJob>> map = new HashMap<String, List<LogAnalyzeJob>>();
        List<LogAnalyzeJob> logAnalyzeJobList = new LogAnalyzeDao().loadJobList();
        System.out.println(logAnalyzeJobList);
        for (LogAnalyzeJob logAnalyzeJob : logAnalyzeJobList) {
            int jobType = logAnalyzeJob.getJobType();
            if (isValidType(jobType)) {
                List<LogAnalyzeJob> jobList = map.get(jobType+"");
                if (jobList == null || jobList.size() == 0) {
                    jobList = new ArrayList<>();
                    map.put(jobType + "", jobList);
                }
                jobList.add(logAnalyzeJob);
            }
        }
        System.out.println("job:  " + map);
        return map;
    }

    public static boolean isValidType(int jobType) {
        if (jobType == LogTypeConstant.BUY || jobType == LogTypeConstant.CLICK
                || jobType == LogTypeConstant.VIEW || jobType == LogTypeConstant.SEARCH) {
            return true;
        }
        return false;
    }

    /**
     * 配置scheduleLoad重新加载底层数据模型。
     */
    public static synchronized void reloadDataModel() {
        if (reloaded) {
            jobMap = loadJobMap();
            jobDetail = loadJobDetailMap();
            reloaded = false;
        }
    }

    /**
     * 定时加载配置信息
     * 配合reloadDataModel模块一起使用。
     * 主要实现原理如下:
     * 1,获取分钟的数据值,当分钟数据是10的倍数,就会触发reloadDataModel方法,简称reload时间。
     * 2,reloadDataModel方式是线程安全的,在当前worker中只有一个现成能够操作。
     * 3,为了保证当前线程操作完毕之后,其他线程不再重复操作,设置了一个标识符reloaded。
     * 在非reload时间段时,reloaded一直被置为true;
     * 在reload时间段时,第一个线程进入reloadDataModel后,加载完毕之后会将reloaded置为false。
     */
    public static void scheduleLoad() {
        String date = DateUtils.getDateTime();
        int now = Integer.parseInt(date.split(":")[1]);
        if (now % 10 == 0) {//每10分钟加载一次
            reloadDataModel();
        } else {
            reloaded = true;
        }
    }
    
}
public class MyShardedJedisPool {

    private static ShardedJedisPool shardedJedisPool;

    // 静态代码初始化池配置
    static {
        //change "maxActive" -> "maxTotal" and "maxWait" -> "maxWaitMillis" in all examples
        JedisPoolConfig config = new JedisPoolConfig();
        //控制一个pool最多有多少个状态为idle(空闲的)的jedis实例。
        config.setMaxIdle(5);
        //控制一个pool可分配多少个jedis实例,通过pool.getResource()来获取;
        //如果赋值为-1,则表示不限制;如果pool已经分配了maxActive个jedis实例,则此时pool的状态为exhausted(耗尽)。
        //在borrow一个jedis实例时,是否提前进行validate操作;如果为true,则得到的jedis实例均是可用的;
        config.setMaxTotal(-1);
        //表示当borrow(引入)一个jedis实例时,最大的等待时间,如果超过等待时间,则直接抛出JedisConnectionException;
        config.setMaxWaitMillis(5);
        config.setTestOnBorrow(true);
        config.setTestOnReturn(true);
        //创建四个redis服务实例,并封装在list中
        List<JedisShardInfo> list = new LinkedList<JedisShardInfo>();
        list.add(new JedisShardInfo("127.0.0.1", 6379));
        //创建具有分片功能的的Jedis连接池
        shardedJedisPool = new ShardedJedisPool(config, list);
    }
    public static ShardedJedisPool getShardedJedisPool() {
        return shardedJedisPool;
    }
    public static void main(String[] args) {
        ShardedJedis jedis = MyShardedJedisPool.getShardedJedisPool().getResource();
        jedis.set("1","maoxiangyi");
        jedis.set("2","itcast");
        jedis.set("3","传智播客");
        jedis.set("4","java学院");
    }
}

8、Storm驱动类

public class LogAnalyzeTopologyMain {
    public static void main(String[] args) throws  Exception{
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("kafka-spout", new RandomSpout(), 2);
        builder.setBolt("MessageFilter-bolt",new MessageFilterBolt(),3).shuffleGrouping("kafka-spout");
        builder.setBolt("ProcessMessage-bolt",new ProcessMessage(),2).fieldsGrouping("MessageFilter-bolt", new Fields("type"));
        Config topologConf = new Config();
        if (args != null && args.length > 0) {
            topologConf.setNumWorkers(2);
            StormSubmitter.submitTopologyWithProgressBar(args[0], topologConf, builder.createTopology());
        } else {
            topologConf.setMaxTaskParallelism(3);
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("LogAnalyzeTopologyMain", topologConf, builder.createTopology());
            Utils.sleep(10000000);
            cluster.shutdown();
        }
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值