目录
本篇文章记录广告点击流量实时统计-计算每天各广告最近1小时滑动窗口内的点击趋势。
代码
domain
AdClickTrend.java
package graduation.java.domain; /** * FileName: AdClickTrend * Author: hadoop * Email: 3165845957@qq.com * Date: 19-4-4 下午8:33 * Description: * 一小时内广告点击趋势实体类 */ public class AdClickTrend { private String date; private String hour; private String minute; private long adid; private long clickCount; public String getDate() { return date; } public void setDate(String date) { this.date = date; } public String getHour() { return hour; } public void setHour(String hour) { this.hour = hour; } public String getMinute() { return minute; } public void setMinute(String minute) { this.minute = minute; } public long getAdid() { return adid; } public void setAdid(long adid) { this.adid = adid; } public long getClickCount() { return clickCount; } public void setClickCount(long clickCount) { this.clickCount = clickCount; } @Override public String toString() { return "AdClickTrend{" + "date='" + date + '\'' + ", hour='" + hour + '\'' + ", minute='" + minute + '\'' + ", adid=" + adid + ", clickCount=" + clickCount + '}'; } }
dao
IAdClickTrendDAO.java
package graduation.java.dao; import graduation.java.domain.AdClickTrend; import java.util.List; /** * FileName: IAdClickTrendDAO * Author: hadoop * Email: 3165845957@qq.com * Date: 19-4-4 下午8:36 * Description: * 一小时内广告点击DAO接口类 */ public interface IAdClickTrendDAO { /** * 批量更新或插入一小时内广告点击趋势数据 * @param adClickTrendList */ void updateBatch(List<AdClickTrend> adClickTrendList); }
impl
AdClickTrendDAOImpl.java
package graduation.java.domain; import com.mysql.jdbc.JDBC4CallableStatement; import graduation.java.dao.IAdClickTrendDAO; import graduation.java.jdbc.JDBCHelper; import graduation.java.model.AdClickTrendQueryResult; import java.sql.ResultSet; import java.util.ArrayList; import java.util.List; /** * FileName: AdClickTrendDAOImpl * Author: hadoop * Email: 3165845957@qq.com * Date: 19-4-4 下午8:39 * Description: * 一小时广告点击趋势DAO实现类 */ public class AdClickTrendDAOImpl implements IAdClickTrendDAO { @Override public void updateBatch(List<AdClickTrend> adClickTrendList) { JDBCHelper jdbcHelper = JDBCHelper.getInstance(); // 区分出来哪些数据是要插入的,哪些数据是要更新的 // 提醒一下,比如说,通常来说,同一个key的数据(比如rdd,包含了多条相同的key) // 通常是在一个分区内的 // 一般不会出现重复插入的 // 但是根据业务需求来 // 各位自己在实际做项目的时候,一定要自己思考,不要生搬硬套 // 如果说可能会出现key重复插入的情况 // 给一个create_time字段 // j2ee系统在查询的时候,直接查询最新的数据即可(规避掉重复插入的问题) List<AdClickTrend> updateAdClickTrendList = new ArrayList<AdClickTrend>(); List<AdClickTrend> insertAdClickTrendList = new ArrayList<AdClickTrend>(); String selectSQL = "SELECT " + "count(*) " + "FROM ad_click_trend " + "WHERE date=? " + "AND hour=? " + "AND minute=? " + "AND ad_id=?"; for (AdClickTrend adClickTrend : adClickTrendList){ AdClickTrendQueryResult queryResult = new AdClickTrendQueryResult(); Object[] params = new Object[]{ adClickTrend.getDate(), adClickTrend.getHour(), adClickTrend.getMinute(), adClickTrend.getAdid() }; jdbcHelper.executeQuery(selectSQL, params, new JDBCHelper.QueryCallback() { @Override public void process(ResultSet rs) throws Exception { while (rs.next()){ int clickCount = rs.getInt(1); queryResult.setCount(clickCount); } } }); int count = queryResult.getCount(); if (count > 0){ updateAdClickTrendList.add(adClickTrend); }else{ insertAdClickTrendList.add(adClickTrend); } } //执行批量更新操作 String updateSQL = "UPDATE ad_click_trend SET click_count=? " + "WHERE date=? " + "AND hour=? " + "AND minute=? " + "AND ad_id=?"; List<Object[]> updateParamsList = new ArrayList<Object[]>(); for (AdClickTrend adClickTrend : updateAdClickTrendList){ Object[] params = new Object[]{ adClickTrend.getClickCount(), adClickTrend.getDate(), adClickTrend.getHour(), adClickTrend.getMinute(), adClickTrend.getAdid() }; updateParamsList.add(params); } jdbcHelper.executeBatch(updateSQL,updateParamsList); //执行批量插入操作 String insertSQL = "INSERT INTO ad_click_count VALUES(?,?,?,?,?)"; List<Object[]> insertParamsList = new ArrayList<Object[]>(); for (AdClickTrend adClickTrend : insertAdClickTrendList){ Object[] params = new Object[]{ adClickTrend.getDate(), adClickTrend.getHour(), adClickTrend.getMinute(), adClickTrend.getAdid(), adClickTrend.getClickCount() }; insertParamsList.add(params); } jdbcHelper.executeBatch(insertSQL,insertParamsList); } }
factory
DAOFactory.java
/** * 一小时广告点击趋势管理DAO * @return */ public static IAdClickTrendDAO getAdClickTrendADO() { return new AdClickTrendDAOImpl(); }
model
AdClickTrendQueryResult.java
package graduation.java.model; /** * FileName: AdClickQueryResult * Author: hadoop * Email: 3165845957@qq.com * Date: 19-4-4 下午8:42 * Description: * 1一小时广告点击查询结果保存类 */ public class AdClickTrendQueryResult { private int count; public int getCount() { return count; } public void setCount(int count) { this.count = count; } @Override public String toString() { return "AdClickTrendQueryResult{" + "count=" + count + '}'; } }
spark,ad
AdClickRealTimeStatSpark.java
/** * 计算最近1小时滑动窗口内的广告点击趋势 * @param adRealTimeLogDStream */ private static void calculateAdClickCountByWindow(JavaPairInputDStream<String, String> adRealTimeLogDStream) { // 映射成<yyyyMMddHHMM_adid,1L>格式 JavaPairDStream<String,Long> pairDStream = adRealTimeLogDStream.mapToPair(new PairFunction<Tuple2<String, String>, String, Long>() { private static final long serialVersionUID =1L; @Override public Tuple2<String, Long> call(Tuple2<String, String> tuple) throws Exception { // timestamp province city userid adid String[] logSplited = tuple._2.split(" "); String timeMinute = DateUtils.formatTimeMinute(new Date(Long.valueOf(logSplited[0]))); long adid = Long.valueOf(logSplited[3]); String key = timeMinute + "_" + adid; return new Tuple2<String,Long>(key,adid); } }); // 过来的每个batch rdd,都会被映射成<yyyyMMddHHMM_adid,1L>的格式 // 每次出来一个新的batch,都要获取最近1小时内的所有的batch // 然后根据key进行reduceByKey操作,统计出来最近一小时内的各分钟各广告的点击次数 // 1小时滑动窗口内的广告点击趋势 // 点图 / 折线图 JavaPairDStream<String,Long> aggrRDD = pairDStream.reduceByKeyAndWindow(new Function2<Long, Long, Long>() { private static final long serialVersionUID = 1L; @Override public Long call(Long v1, Long v2) throws Exception { return v1+v2; } },Durations.minutes(60),Durations.seconds(10)); // aggrRDD // 每次都可以拿到,最近1小时内,各分钟(yyyyMMddHHMM)各广告的点击量 // 各广告,在最近1小时内,各分钟的点击量 aggrRDD.foreachRDD(new VoidFunction<JavaPairRDD<String, Long>>() { private static final long serialVersionUID = 1L; @Override public void call(JavaPairRDD<String, Long> rdd) throws Exception { rdd.foreachPartition(new VoidFunction<Iterator<Tuple2<String, Long>>>() { private static final long serialVersionUID =1L; @Override public void call(Iterator<Tuple2<String, Long>> iterator) throws Exception { List<AdClickTrend> adClickTrends = new ArrayList<AdClickTrend>(); while (iterator.hasNext()){ Tuple2<String,Long> tuple = iterator.next(); String[] keySplited = tuple._1.split("_"); //yyyyMMddHHmm String dateKey = keySplited[0]; String date = dateKey.substring(0,8); String hour = dateKey.substring(8,10); String minute = dateKey.substring(10); long adid = Long.valueOf(keySplited[1]); long clickCount = tuple._2; AdClickTrend adClickTrend = new AdClickTrend(); adClickTrend.setDate(date); adClickTrend.setHour(hour); adClickTrend.setMinute(minute); adClickTrend.setAdid(adid); adClickTrend.setClickCount(clickCount); adClickTrends.add(adClickTrend); } IAdClickTrendDAO adClickTrendDAO = DAOFactory.getAdClickTrendADO(); adClickTrendDAO.updateBatch(adClickTrends); } }); } }); }