1、Storm程序整体目录

2、bloom
/**
* Describe: bloomFilter的java实现
* 开源地址:https://github.com/maoxiangyi/Java-BloomFilter
*/
public class BloomFilter<E> implements Serializable {
private BitSet bitset;
private int bitSetSize;
private double bitsPerElement;
private int expectedNumberOfFilterElements; // 预计添加到过滤器中的最大数量
private int numberOfAddedElements; //实际添加到过滤器中的数据量
private int k; // 通过hash的次数
static final Charset charset = Charset.forName("UTF-8"); // 用来存储hash值的字符串的编码格式
static final String hashName = "MD5"; // 通过MD5进行加密后使用hash算法,满足大多数的情况下
static final MessageDigest digestFunction;
static { // The digest method is reused between instances
MessageDigest tmp;
try {
tmp = MessageDigest.getInstance(hashName);
} catch (NoSuchAlgorithmException e) {
tmp = null;
}
digestFunction = tmp;
}
/**
* 构造一个空的布隆过滤器,它的总长度是c *n
* @param c 每个元素使用的byte数
* @param n 预计过滤器将包含的元素个数
* @param k 使用hash函数的数量
*/
public BloomFilter(double c, int n, int k) {
this.expectedNumberOfFilterElements = n;
this.k = k;
this.bitsPerElement = c;
this.bitSetSize = (int)Math.ceil(c * n);
numberOfAddedElements = 0;
this.bitset = new BitSet(bitSetSize);
}
/**
* 构造一个空的布隆过滤器,哈希函数的最优次数根据布隆过滤器总的大小和预期最大的元素。
*
* @param bitSetSize 定义总过有多少个byte位被布隆过滤器使用
* @param expectedNumberOElements 定义布隆过滤器将要存放的最大元素个数
*/
public BloomFilter(int bitSetSize, int expectedNumberOElements) {
this(bitSetSize / (double)expectedNumberOElements,
expectedNumberOElements,
(int) Math.round((bitSetSize / (double)expectedNumberOElements) * Math.log(2.0)));
}
/**
* 构造一个空的过滤器,并设置一个命中精度。
* 自动根据设置的命中精度来预估每个元素的比特数和最大的hash数
*
* @param falsePositiveProbability 手动设置一个命中的精度
* @param expectedNumberOfElements 定义布隆过滤器将要存放的最大元素个数
*/
public BloomFilter(double falsePositiveProbability, int expectedNumberOfElements) {
this(Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2))) / Math.log(2), // c = k / ln(2)
expectedNumberOfElements,
(int)Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2)))); // k = ceil(-log_2(false prob.))
}
/**
* 构造一个布隆过滤器,在已经存在的数据集上。
* @param bitSetSize 定义总过有多少个byte位被布隆过滤器使用
* @param expectedNumberOfFilterElements 定义布隆过滤器将要存放的最大元素个数
* @param actualNumberOfFilterElements 定义还将有多少个元素插入到已经已经存在的bloomFilter数据集上
* @param filterData 定义已经存在的数据集
*/
public BloomFilter(int bitSetSize, int expectedNumberOfFilterElements, int actualNumberOfFilterElements, BitSet filterData) {
this(bitSetSize, expectedNumberOfFilterElements);
this.bitset = filterData;
this.numberOfAddedElements = actualNumberOfFilterElements;
}
/**
* Generates a digest based on the contents of a String.
*
* @param val 指定输入的数据
* @param charset 指定编码格式
* @return digest as long.
*/
public static int createHash(String val, Charset charset) {
return createHash(val.getBytes(charset));
}
/**
* Generates a digest based on the contents of a String.
*
* @param val 指定输入的数据,默认的编码格式是 UTF-8.
* @return digest as long.
*/
public static int createHash(String val) {
return createHash(val, charset);
}
/**
* Generates a digest based on the contents of an array of bytes.
*
* @param data 指定输入的数据,数据是byte数组
* @return digest as long.
*/
public static int createHash(byte[] data) {
return createHashes(data, 1)[0];
}
/**
* 将一个字节数据分成四个字节,每个字节生成一个整数存放在数组中
*
* digest function is called until the required number of int's are produced.
* For each call to digest a salt is prepended to the data. The salt is increased by 1 for each call.
*
* @param data 指定输入数据
* @param hashes 需要hash的次数
* @return array 通过hash之后产生的int数量
*/
public static int[] createHashes(byte[] data, int hashes) {
int[] result = new int[hashes];
int k = 0;
byte salt = 0;
while (k < hashes) {
byte[] digest;
synchronized (digestFunction) {
digestFunction.update(salt);
salt++;
digest = digestFunction.digest(data);
}
for (int i = 0; i < digest.length/4 && k < hashes; i++) {
int h = 0;
for (int j = (i*4); j < (i*4)+4; j++) {
h <<= 8;
h |= ((int) digest[j]) & 0xFF;
}
result[k] = h;
k++;
}
}
System.out.println(result);
return result;
}
/**
* Compares the contents of two instances to see if they are equal.
*
* @param obj is the object to compare to.
* @return True if the contents of the objects are equal.
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final BloomFilter<E> other = (BloomFilter<E>) obj;
if (this.expectedNumberOfFilterElements != other.expectedNumberOfFilterElements) {
return false;
}
if (this.k != other.k) {
return false;
}
if (this.bitSetSize != other.bitSetSize) {
return false;
}
if (this.bitset != other.bitset && (this.bitset == null || !this.bitset.equals(other.bitset))) {
return false;
}
return true;
}
/**
* Calculates a hash code for this class.
* @return hash code representing the contents of an instance of this class.
*/
@Override
public int hashCode() {
int hash = 7;
hash = 61 * hash + (this.bitset != null ? this.bitset.hashCode() : 0);
hash = 61 * hash + this.expectedNumberOfFilterElements;
hash = 61 * hash + this.bitSetSize;
hash = 61 * hash + this.k;
return hash;
}
/**
* Calculates the expected probability of false positives based on
* the number of expected filter elements and the size of the Bloom filter.
* <br /><br />
* The value returned by this method is the <i>expected</i> rate of false
* positives, assuming the number of inserted elements equals the number of
* expected elements. If the number of elements in the Bloom filter is less
* than the expected value, the true probability of false positives will be lower.
*
* @return expected probability of false positives.
*/
public double expectedFalsePositiveProbability() {
return getFalsePositiveProbability(expectedNumberOfFilterElements);
}
/**
* Calculate the probability of a false positive given the specified
* number of inserted elements.
*
* @param numberOfElements number of inserted elements.
* @return probability of a false positive.
*/
public double getFalsePositiveProbability(double numberOfElements) {
// (1 - e^(-k * n / m)) ^ k
return Math.pow((1 - Math.exp(-k * (double) numberOfElements
/ (double) bitSetSize)), k);
}
/**
* Get the current probability of a false positive. The probability is calculated from
* the size of the Bloom filter and the current number of elements added to it.
*
* @return probability of false positives.
*/
public double getFalsePositiveProbability() {
return getFalsePositiveProbability(numberOfAddedElements);
}
/**
* Returns the value chosen for K.<br />
* <br />
* K is the optimal number of hash functions based on the size
* of the Bloom filter and the expected number of inserted elements.
*
* @return optimal k.
*/
public int getK() {
return k;
}
/**
* Sets all bits to false in the Bloom filter.
*/
public void clear() {
bitset.clear();
numberOfAddedElements = 0;
}
/**
* Adds an object to the Bloom filter. The output from the object's
* toString() method is used as input to the hash functions.
*
* @param element is an element to register in the Bloom filter.
*/
public void add(E element) {
add(element.toString().getBytes(charset));
}
/**
* Adds an array of bytes to the Bloom filter.
*
* @param bytes array of bytes to add to the Bloom filter.
*/
public void add(byte[] bytes) {
int[] hashes = createHashes(bytes, k);
for (int hash : hashes)
bitset.set(Math.abs(hash % bitSetSize), true);
numberOfAddedElements ++;
}
/**
* Adds all elements from a Collection to the Bloom filter.
* @param c Collection of elements.
*/
public void addAll(Collection<? extends E> c) {
for (E element : c)
add(element);
}
/**
* Returns true if the element could have been inserted into the Bloom filter.
* Use getFalsePositiveProbability() to calculate the probability of this
* being correct.
*
* @param element element to check.
* @return true if the element could have been inserted into the Bloom filter.
*/
public boolean contains(E element) {
return contains(element.toString().getBytes(charset));
}
/**
* Returns true if the array of bytes could have been inserted into the Bloom filter.
* Use getFalsePositiveProbability() to calculate the probability of this
* being correct.
*
* @param bytes array of bytes to check.
* @return true if the array could have been inserted into the Bloom filter.
*/
public boolean contains(byte[] bytes) {
int[] hashes = createHashes(bytes, k);
for (int hash : hashes) {
if (!bitset.get(Math.abs(hash % bitSetSize))) {
return false;
}
}
return true;
}
/**
* Returns true if all the elements of a Collection could have been inserted
* into the Bloom filter. Use getFalsePositiveProbability() to calculate the
* probability of this being correct.
* @param c elements to check.
* @return true if all the elements in c could have been inserted into the Bloom filter.
*/
public boolean containsAll(Collection<? extends E> c) {
for (E element : c)
if (!contains(element))
return false;
return true;
}
/**
* Read a single bit from the Bloom filter.
* @param bit the bit to read.
* @return true if the bit is set, false if it is not.
*/
public boolean getBit(int bit) {
return bitset.get(bit);
}
/**
* Set a single bit in the Bloom filter.
* @param bit is the bit to set.
* @param value If true, the bit is set. If false, the bit is cleared.
*/
public void setBit(int bit, boolean value) {
bitset.set(bit, value);
}
/**
* Return the bit set used to store the Bloom filter.
* @return bit set representing the Bloom filter.
*/
public BitSet getBitSet() {
return bitset;
}
/**
* Returns the number of bits in the Bloom filter. Use count() to retrieve
* the number of inserted elements.
*
* @return the size of the bitset used by the Bloom filter.
*/
public int size() {
return this.bitSetSize;
}
/**
* 返回添加到布隆过滤器的元素数量。
*
* @return 添加到布隆过滤器的元素数量。
*/
public int count() {
return this.numberOfAddedElements;
}
/**
* 返回布隆过滤器中预期最大的值,这个只和传给构造器的值是一样的。
* @return 布隆过滤器中预期最大的值
*/
public int getExpectedNumberOfElements() {
return expectedNumberOfFilterElements;
}
/**
* Get expected number of bits per element when the Bloom filter is full.
* This value is set by the constructor when the Bloom filter is created. See also getBitsPerElement().
*
* @return expected number of bits per element.
*/
public double getExpectedBitsPerElement() {
return this.bitsPerElement;
}
/**
* Get actual number of bits per element based on the number of elements that have currently been inserted and the length
* of the Bloom filter. See also getExpectedBitsPerElement().
*
* @return number of bits per element.
*/
public double getBitsPerElement() {
return this.bitSetSize / (double)numberOfAddedElements;
}
}
/**
* Describe: BloomFilter的测试类
*/
public class BloomfilterBenchmark {
static int elementCount = 1; // Number of elements to test
public static void printStat(long start, long end) {
double diff = (end - start) / 1000.0;
System.out.println(diff + "s, " + (elementCount / diff) + " elements/s");
}
public static void main(String[] argv) {
final Random r = new Random();
// 创建50000个元素,用来添加到过滤器中
List<String> existingElements = new ArrayList(elementCount);
for (int i = 0; i < elementCount; i++) {
byte[] b = new byte[200];
r.nextBytes(b);
existingElements.add(new String(b));
}
//创建500000个元素,用作比较
List<String> nonExistingElements = new ArrayList(elementCount);
for (int i = 0; i < elementCount; i++) {
byte[] b = new byte[200];
r.nextBytes(b);
nonExistingElements.add(new String(b));
}
//设置一个空的布隆过滤器,设置命中高精度和预期存放的最大元素个数据
//这个构造器能够,能够自动算出hash函数的次数
BloomFilter<String> bf = new BloomFilter<String>(0.001, elementCount);
//打印测试的元素个数
System.out.println("Testing " + elementCount + " elements");
//打印计算出来的最优hash次数
System.out.println("k is " + bf.getK());
// 添加500w个元素,看看平均添加时间
//添加50w个元素需要3.24秒,平均每秒添加15w个元素。
//add(): 3.24s, 154320.98765432098 elements/s
System.out.print("add(): ");
long start_add = System.currentTimeMillis();
for (int i = 0; i < elementCount; i++) {
bf.add(existingElements.get(i));
}
long end_add = System.currentTimeMillis();
printStat(start_add, end_add);
// 检查50w个元素是否存在,需要的时间
//contains(), existing: 3.181s, 157183.27569946556 elements/s
//检查50w个元素是否存在,耗时3.18秒,每秒15w个
System.out.print("contains(), existing: ");
long start_contains = System.currentTimeMillis();
for (int i = 0; i < elementCount; i++) {
bf.contains(existingElements.get(i));
}
long end_contains = System.currentTimeMillis();
printStat(start_contains, end_contains);
// Check for existing elements with containsAll()
System.out.print("containsAll(), existing: ");
long start_containsAll = System.currentTimeMillis();
for (int i = 0; i < elementCount; i++) {
bf.contains(existingElements.get(i));
}
long end_containsAll = System.currentTimeMillis();
printStat(start_containsAll, end_containsAll);
// Check for nonexisting elements with contains()
System.out.print("contains(), nonexisting: ");
long start_ncontains = System.currentTimeMillis();
for (int i = 0; i < elementCount; i++) {
bf.contains(nonExistingElements.get(i));
}
long end_ncontains = System.currentTimeMillis();
printStat(start_ncontains, end_ncontains);
// Check for nonexisting elements with containsAll()
System.out.print("containsAll(), nonexisting: ");
long start_ncontainsAll = System.currentTimeMillis();
for (int i = 0; i < elementCount; i++) {
bf.contains(nonExistingElements.get(i));
}
long end_ncontainsAll = System.currentTimeMillis();
printStat(start_ncontainsAll, end_ncontainsAll);
}
}
3、bolt
public class MessageFilterBolt extends BaseBasicBolt {
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
//获取KafkaSpout发送出来的数据
String line = input.getString(0);
//对数据进行解析
LogMessage logMessage = LogAnalyzeHandler.parser(line);
if (logMessage == null || !LogAnalyzeHandler.isValidType(logMessage.getType())) {
return;
}
collector.emit(new Values(logMessage.getType(), logMessage));
//定时更新规则信息
LogAnalyzeHandler.scheduleLoad();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//根据点击内容类型将日志进行区分
declarer.declare(new Fields("type", "message"));
}
}
public class ProcessMessage extends BaseBasicBolt {
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
LogMessage logMessage = (LogMessage) input.getValueByField("message");
LogAnalyzeHandler.process(logMessage);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
4、constant
/**
* Describe: 用户行为日志的主要分类包括:1:浏览日志、2:点击日志、3:搜索日志、4:购买日志
*/
public class LogTypeConstant {
//浏览类型的数据
public static final int VIEW = 1;
//点击类型的数据
public static final int CLICK = 2;
//搜索类型的数据
public static final int SEARCH = 3;
//购买类型的数据
public static final int BUY = 4;
}
5、dao
public class DataSourceUtil {
private static DataSource dataSource;
static {
dataSource = new ComboPooledDataSource("logAnalyze");
}
public static synchronized DataSource getDataSource() {
if (dataSource == null) {
dataSource = new ComboPooledDataSource();
}
return dataSource;
}
public static void main(String[] args) {
JdbcTemplate jdbcTemplate = new JdbcTemplate(dataSource);
Record record = new Record();
record.setAppId(1);
record.setRuleId(1);
record.setIsEmail(1);
record.setIsPhone(1);
record.setIsColse(0);
String sql = "INSERT INTO `log_monitor`.`log_monitor_rule_record` (`appId`,`ruleId`,`isEmail`,`isPhone`,`isColse`,`noticeInfo`,`updataDate`) VALUES ( ?,?,?,?,?,?,?)";
jdbcTemplate.update(sql, record.getAppId(), record.getRuleId(), record.getIsEmail(), record.getIsPhone(), 0, record.getLine(),new Date());
}
}
public class LogAnalyzeDao {
private static Logger logger = Logger.getLogger(LogAnalyzeDao.class);
private JdbcTemplate jdbcTemplate;
public LogAnalyzeDao() {
jdbcTemplate = new JdbcTemplate(DataSourceUtil.getDataSource());
}
public List<LogAnalyzeJob> loadJobList() {
String sql = "SELECT `jobId`,`jobName`,`jobType` FROM `log_analyze`.`log_analyze_job` WHERE STATUS= 1";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<LogAnalyzeJob>(LogAnalyzeJob.class));
}
public List<LogAnalyzeJobDetail> loadJobDetailList() {
String sql = "SELECT condi.`jobId`,condi.`field`,condi.`value`,condi.`compare` " +
" FROM `log_analyze`.`log_analyze_job` AS job " +
" LEFT JOIN `log_analyze`.`log_analyze_job_condition` AS condi " +
" ON job.`jobId` = condi.`jobId` " +
" WHERE job.`status` =1";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<LogAnalyzeJobDetail>(LogAnalyzeJobDetail.class));
}
public int[][] saveMinuteAppendRecord(List<BaseRecord> appendDataList) {
String sql = "INSERT INTO `log_analyze`.`log_analyze_job_nimute_append` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
return saveAppendRecord(appendDataList, sql);
}
public int[][] saveHalfAppendRecord(List<BaseRecord> appendDataList) {
String sql = "INSERT INTO `log_analyze`.`log_analyze_job_half_append` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
return saveAppendRecord(appendDataList, sql);
}
public int[][] saveHourAppendRecord(List<BaseRecord> appendDataList) {
String sql = "INSERT INTO `log_analyze`.`log_analyze_job_hour_append` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
return saveAppendRecord(appendDataList, sql);
}
public int[][] saveDayAppendRecord(List<BaseRecord> appendDataList) {
String sql = "INSERT INTO `log_analyze`.`log_analyze_job_day` (`indexName`,`pv`,`uv`,`executeTime`,`createTime` ) VALUES (?,?,?,?,?)";
return saveAppendRecord(appendDataList, sql);
}
public int[][] saveAppendRecord(List<BaseRecord> appendDataList, String sql) {
return jdbcTemplate.batchUpdate(sql, appendDataList, appendDataList.size(), new ParameterizedPreparedStatementSetter<BaseRecord>() {
@Override
public void setValues(PreparedStatement ps, BaseRecord argument) throws SQLException {
ps.setString(1, argument.getIndexName());
ps.setInt(2, argument.getPv());
ps.setLong(3, argument.getUv());
ps.setTimestamp(4, new Timestamp(new Date().getTime()));
ps.setTimestamp(5, new Timestamp(new Date().getTime()));
}
});
}
public List<BaseRecord> sumRecordValue(String startTime, String endTime) {
String sql = "SELECT indexName,SUM(pv) AS pv,SUM(uv) AS uv FROM `log_analyze_job_nimute_append` " +
" WHERE executeTime BETWEEN '" + startTime + "' AND '" +endTime+"' "+
" GROUP BY indexName";
return jdbcTemplate.query(sql , new BeanPropertyRowMapper<BaseRecord>(BaseRecord.class));
}
}
6、domain
public class LogAnalyzeJob {
private String jobId ;
private String jobName;
private int jobType; //1:浏览日志、2:点击日志、3:搜索日志、4:购买日志
private int bussinessId;
private int status;
}
public class LogAnalyzeJobDetail {
private int id;
private int jobId;
private String field;
private String value;
private int compare;
}
public class LogMessage implements Serializable {
private static final long serialVersionUID = 7270840760720823716L;
private int type;//1:浏览日志、2:点击日志、3:搜索日志、4:购买日志
private String hrefTag;//标签标识
private String hrefContent;//标签对应的标识,主要针对a标签之后的内容
private String referrerUrl;//来源网址
private String requestUrl;//来源网址
private String clickTime;//点击时间
private String appName;//浏览器类型
private String appVersion;//浏览器版本
private String language;//浏览器语言
private String platform;//操作系统
private String screen;//屏幕尺寸
private String coordinate;//鼠标点击时的坐标
private String systemId; //产生点击流的系统编号
private String userName;//用户名称
}
7、utils
public class LogAnalyzeHandler {
//定时加载配置文件的标识
private static boolean reloaded = false;
//用来保存job信息,key为jobType,value为该类别下所有的任务。
private static Map<String, List<LogAnalyzeJob>> jobMap;
//用来保存job的判断条件,key为jobId,value为list,list中封装了很多判断条件。
private static Map<String, List<LogAnalyzeJobDetail>> jobDetail;
static {
jobMap = loadJobMap();
jobDetail = loadJobDetailMap();
}
public static LogMessage parser(String line) {
LogMessage logMessage = new Gson().fromJson(line, LogMessage.class);
return logMessage;
}
/**
* pv 在redis中是string,key为:log:{jobId}:pv:{20151116},value=pv数量。
* uv 使用java-bloomFilter计算,https://github.com/maoxiangyi/Java-BloomFilter
*
* @param logMessage
*/
public static void process(LogMessage logMessage) {
if (jobMap == null || jobDetail == null) {
loadDataModel();
}
// kafka来的日志:2,req,ref,xxx,xxx,xxx,yy
List<LogAnalyzeJob> analyzeJobList = jobMap.get(logMessage.getType()+"");
for (LogAnalyzeJob logAnalyzeJob : analyzeJobList) {
boolean isMatch = false; //是否匹配
List<LogAnalyzeJobDetail> logAnalyzeJobDetailList = jobDetail.get(logAnalyzeJob.getJobId());
for (LogAnalyzeJobDetail jobDetail : logAnalyzeJobDetailList) {
//jobDetail,指定和kakfa输入过来的数据中的 requesturl比较
// 获取kafka输入过来的数据的requesturl的值
String fieldValueInLog = logMessage.getCompareFieldValue(jobDetail.getField());
//1:包含 2:等于 3:正则
if (jobDetail.getCompare() == 1 && fieldValueInLog.contains(jobDetail.getValue())) {
isMatch = true;
} else if (jobDetail.getCompare() == 2 && fieldValueInLog.equals(jobDetail.getValue())) {
isMatch = true;
} else {
isMatch = false;
}
if (!isMatch) {
break;
}
}
if (isMatch) {
//设置pv
String pvKey = "log:" + logAnalyzeJob.getJobName() + ":pv:" + DateUtils.getDate();
String uvKey = "log:" + logAnalyzeJob.getJobName() + ":uv:" + DateUtils.getDate();
ShardedJedis jedis = MyShardedJedisPool.getShardedJedisPool().getResource();
//给pv+1
jedis.incr(pvKey);
//设置uv,uv需要去重,使用set
jedis.sadd(uvKey, logMessage.getUserName());
}
}
}
/**
* 计算单个指标点击的数量
*/
private static void processViewLog(LogMessage logMessage) {
if (jobMap == null || jobDetail == null) {
loadDataModel();
}
List<LogAnalyzeJob> analyzeJobList = jobMap.get(LogTypeConstant.VIEW + "");
for (LogAnalyzeJob logAnalyzeJob : analyzeJobList) {
boolean isMatch = false;
List<LogAnalyzeJobDetail> logAnalyzeJobDetailList = jobDetail.get(logAnalyzeJob.getJobId());
for (LogAnalyzeJobDetail jobDetail : logAnalyzeJobDetailList) {
String fieldValueInLog = logMessage.getCompareFieldValue(jobDetail.getField());
//1:包含 2:等于
if (jobDetail.getCompare() == 1 && fieldValueInLog.contains(jobDetail.getValue())) {
isMatch = true;
} else if (jobDetail.getCompare() == 2 && fieldValueInLog.equals(jobDetail.getValue())) {
isMatch = true;
} else {
isMatch = false;
}
if (!isMatch) {
break;
}
}
if (isMatch) {
//设置pv
String pvKey = "log:" + logAnalyzeJob.getJobName() + ":pv:" + DateUtils.getDate();
String uvKey = "log:" + logAnalyzeJob.getJobName() + ":uv:" + DateUtils.getDate();
ShardedJedis jedis = MyShardedJedisPool.getShardedJedisPool().getResource();
jedis.incr(pvKey);
//设置uv
jedis.sadd(uvKey, logMessage.getUserName());
//优惠策略,使用bloomFilter算法进行优化
}
}
}
private synchronized static void loadDataModel() {
if (jobMap == null) {
jobMap = loadJobMap();
}
if (jobDetail == null) {
jobDetail = loadJobDetailMap();
}
}
private static Map<String, List<LogAnalyzeJobDetail>> loadJobDetailMap() {
Map<String, List<LogAnalyzeJobDetail>> map = new HashMap<String, List<LogAnalyzeJobDetail>>();
List<LogAnalyzeJobDetail> logAnalyzeJobDetailList = new LogAnalyzeDao().loadJobDetailList();
for (LogAnalyzeJobDetail logAnalyzeJobDetail : logAnalyzeJobDetailList) {
int jobId = logAnalyzeJobDetail.getJobId();
List<LogAnalyzeJobDetail> jobDetails = map.get(jobId + "");
if (jobDetails == null || jobDetails.size() == 0) {
jobDetails = new ArrayList<>();
map.put(jobId + "", jobDetails);
}
jobDetails.add(logAnalyzeJobDetail);
}
System.out.println("jobDetailMap: "+map);
return map;
}
private static Map<String, List<LogAnalyzeJob>> loadJobMap() {
Map<String, List<LogAnalyzeJob>> map = new HashMap<String, List<LogAnalyzeJob>>();
List<LogAnalyzeJob> logAnalyzeJobList = new LogAnalyzeDao().loadJobList();
System.out.println(logAnalyzeJobList);
for (LogAnalyzeJob logAnalyzeJob : logAnalyzeJobList) {
int jobType = logAnalyzeJob.getJobType();
if (isValidType(jobType)) {
List<LogAnalyzeJob> jobList = map.get(jobType+"");
if (jobList == null || jobList.size() == 0) {
jobList = new ArrayList<>();
map.put(jobType + "", jobList);
}
jobList.add(logAnalyzeJob);
}
}
System.out.println("job: " + map);
return map;
}
public static boolean isValidType(int jobType) {
if (jobType == LogTypeConstant.BUY || jobType == LogTypeConstant.CLICK
|| jobType == LogTypeConstant.VIEW || jobType == LogTypeConstant.SEARCH) {
return true;
}
return false;
}
/**
* 配置scheduleLoad重新加载底层数据模型。
*/
public static synchronized void reloadDataModel() {
if (reloaded) {
jobMap = loadJobMap();
jobDetail = loadJobDetailMap();
reloaded = false;
}
}
/**
* 定时加载配置信息
* 配合reloadDataModel模块一起使用。
* 主要实现原理如下:
* 1,获取分钟的数据值,当分钟数据是10的倍数,就会触发reloadDataModel方法,简称reload时间。
* 2,reloadDataModel方式是线程安全的,在当前worker中只有一个现成能够操作。
* 3,为了保证当前线程操作完毕之后,其他线程不再重复操作,设置了一个标识符reloaded。
* 在非reload时间段时,reloaded一直被置为true;
* 在reload时间段时,第一个线程进入reloadDataModel后,加载完毕之后会将reloaded置为false。
*/
public static void scheduleLoad() {
String date = DateUtils.getDateTime();
int now = Integer.parseInt(date.split(":")[1]);
if (now % 10 == 0) {//每10分钟加载一次
reloadDataModel();
} else {
reloaded = true;
}
}
}
public class MyShardedJedisPool {
private static ShardedJedisPool shardedJedisPool;
// 静态代码初始化池配置
static {
//change "maxActive" -> "maxTotal" and "maxWait" -> "maxWaitMillis" in all examples
JedisPoolConfig config = new JedisPoolConfig();
//控制一个pool最多有多少个状态为idle(空闲的)的jedis实例。
config.setMaxIdle(5);
//控制一个pool可分配多少个jedis实例,通过pool.getResource()来获取;
//如果赋值为-1,则表示不限制;如果pool已经分配了maxActive个jedis实例,则此时pool的状态为exhausted(耗尽)。
//在borrow一个jedis实例时,是否提前进行validate操作;如果为true,则得到的jedis实例均是可用的;
config.setMaxTotal(-1);
//表示当borrow(引入)一个jedis实例时,最大的等待时间,如果超过等待时间,则直接抛出JedisConnectionException;
config.setMaxWaitMillis(5);
config.setTestOnBorrow(true);
config.setTestOnReturn(true);
//创建四个redis服务实例,并封装在list中
List<JedisShardInfo> list = new LinkedList<JedisShardInfo>();
list.add(new JedisShardInfo("127.0.0.1", 6379));
//创建具有分片功能的的Jedis连接池
shardedJedisPool = new ShardedJedisPool(config, list);
}
public static ShardedJedisPool getShardedJedisPool() {
return shardedJedisPool;
}
public static void main(String[] args) {
ShardedJedis jedis = MyShardedJedisPool.getShardedJedisPool().getResource();
jedis.set("1","maoxiangyi");
jedis.set("2","itcast");
jedis.set("3","传智播客");
jedis.set("4","java学院");
}
}
8、Storm驱动类
public class LogAnalyzeTopologyMain {
public static void main(String[] args) throws Exception{
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafka-spout", new RandomSpout(), 2);
builder.setBolt("MessageFilter-bolt",new MessageFilterBolt(),3).shuffleGrouping("kafka-spout");
builder.setBolt("ProcessMessage-bolt",new ProcessMessage(),2).fieldsGrouping("MessageFilter-bolt", new Fields("type"));
Config topologConf = new Config();
if (args != null && args.length > 0) {
topologConf.setNumWorkers(2);
StormSubmitter.submitTopologyWithProgressBar(args[0], topologConf, builder.createTopology());
} else {
topologConf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("LogAnalyzeTopologyMain", topologConf, builder.createTopology());
Utils.sleep(10000000);
cluster.shutdown();
}
}
}