目录
本篇文章记录各区域热门商品统计-异构数据源之从MySQL中查询城市数据。
代码
AreaTop3ProductSpark.java
/**查询指定日期范围内的点击行为数据 * * @param sqlContext * @param startDate 起始日期 * @param endDate 结束日期 * @return */ private static JavaPairRDD<Long,Row> getCityid2ClickActionRDDByDate(SQLContext sqlContext, String startDate, String endDate) { // 从user_visit_action中,查询用户访问行为数据 // 第一个限定:click_product_id,限定为不为空的访问行为,那么就代表着点击行为 // 第二个限定:在用户指定的日期范围内的数据 String sql = "SELECT " + "city_id," + "click_product_id product_id " + "FROM user_visit_action " + "WHERE click_product_id IS NOT NULL " + "AND click_product_id != 'NULL' " + "AND click_product_id != 'null' " + "AND action_time >='" + startDate + "' " + "AND action_time <= '"+ endDate + "'" ; Dataset clickActionDs = sqlContext.sql(sql); JavaRDD<Row> clickActionRDD = clickActionDs.javaRDD(); JavaPairRDD<Long,Row> cityid2clickActionRDD = clickActionRDD.mapToPair( new PairFunction<Row, Long, Row>() { private static final long serialVersionUID = 1L; @Override public Tuple2<Long, Row> call(Row row) throws Exception { long cityid = row.getLong(0); return new Tuple2<Long,Row>(cityid,row); } }); return cityid2clickActionRDD; } private static JavaPairRDD<Long,Row> getCityid2CityInfoRDD(SQLContext sqlContext){ //构建Mysql连接配置信息(从配置文件中获取) String url = null; boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL); if (local){ url = ConfigurationManager.getProperty(Constants.JDBC_URL); }else { url = ConfigurationManager.getProperty(Constants.JDBC_URL_PROD); } Map<String,String> options = new HashMap<String,String>(); options.put("url",url); options.put("dbtable","city_info"); //通过SQLContext去从Mysql中查询数据 Dataset cityInfoDs = sqlContext.read().format("jdbc") .options(options).load(); //返回RDD JavaRDD<Row> cityInfoRDD = cityInfoDs.javaRDD(); JavaPairRDD<Long,Row> cityid2cityInfoRDD = cityInfoRDD.mapToPair(new PairFunction<Row, Long, Row>() { private static final long serialVersionUID = 1L; @Override public Tuple2<Long, Row> call(Row row) throws Exception { long cityid = row.getLong(0); return new Tuple2<Long,Row>(cityid,row); } }); return cityid2cityInfoRDD; }