目录
hive中建表
在hive中创建与业务数据一样的表
CREATE TABLE cartinfo(userid string, productid string,num string,productamount string,createtime string,mechartid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
...
sqoop
安装sqoop来同步业务数据到hive
下载sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz
解压之后,将mysql驱动包放在sqoop的lib目录下
执行如下命令同步数据
sqoop import --connect jdbc:mysql://master:3306/ds --username root --password 123456 --table cartinfo --fields-terminated-by '\t' --null-string '**' --target-dir /user/hive/warehouse/cartinfo/1 --hive-table cartinfo --m 1 --hive-import
...
可得如下结果:
同步数据完成。
flink batch实现产品成交分析
详细代码可参考文末github地址
map:
public class ProductanalyMap implements FlatMapFunction<String, ProductAnaly> {
@Override
public void flatMap(String value, Collector<ProductAnaly> out) throws Exception {
OrderInfo orderInfo = JSONObject.parseObject(value,OrderInfo.class);
long productid = orderInfo.getProductid();
Date date = orderInfo.getCreatetime();
String timestring = DateUtil.getDateby(date.getTime(),"yyyyMM");
Date paytime = orderInfo.getPaytime();
long chengjiaocount =0l; //成交
long weichegnjiao = 0;//未成交
if(paytime != null){
chengjiaocount = 1l;
}else{
weichegnjiao = 0l;
}
ProductAnaly productAnaly = new ProductAnaly();
productAnaly.setProductid(productid);
productAnaly.setDateString(timestring);
productAnaly.setChengjiaocount(chengjiaocount);
productAnaly.setWeichegnjiao(weichegnjiao);
productAnaly.setGroupbyfield(timestring+productid);
out.collect(productAnaly);
}
}
reduce:
public class ProductanalyReduce implements ReduceFunction<ProductAnaly> {
@Override
public ProductAnaly reduce(ProductAnaly value1, ProductAnaly value2) throws Exception {
String datetime = value1.getDateString();
long productid = value1.getProductid();
long chengjiaovalue1 = value1.getChengjiaocount();
long weichegnjiaovalue1 = value1.getWeichegnjiao();
long chengjiaovalue2 = value1.getChengjiaocount();
long weichegnjiaovalue2 = value1.getWeichegnjiao();
ProductAnaly productAnaly = new ProductAnaly();
productAnaly.setDateString(datetime);
productAnaly.setProductid(productid);
productAnaly.setChengjiaocount(chengjiaovalue1+chengjiaovalue2);
productAnaly.setWeichegnjiao(weichegnjiaovalue1+weichegnjiaovalue2);
return productAnaly;
}
}
flink执行:
DataSet<String> text = env.readTextFile(params.get("input"));
DataSet<ProductAnaly> map = text.flatMap(new ProductanalyMap());
DataSet<ProductAnaly> reduce = map.groupBy("groupbyfield").reduce(new ProductanalyReduce());
try {
List<ProductAnaly> list = reduce.collect();
for(ProductAnaly value :list){
long productid = value.getProductid();
String datatime = value.getDateString();
long chengjiaocount = value.getChengjiaocount();
long weichengjiaocount = value.getWeichegnjiao();
Map<String,String> datamap = new HashMap<String,String>();
datamap.put("chengjiaocount",chengjiaocount+"");
datamap.put("weichengjiaocount",weichengjiaocount+"");
HbaseUtil.put("pindaoinfo",productid+"=="+datatime,"info",datamap);
}
env.execute("pindaossfx");
} catch (Exception e) {
e.printStackTrace();
}
总结
创建hive业务表。通过sqoop同步电商数据。flink batch实现产品成交分析.后续搭建接口服务,调用hive获取hive数据;搭建前端服务调用接口。
具体代码可参照我的git项目地址,现有代码均已通过测试可以使用,后续会持续更新,直到项目结束,不懂的细节,可以关注公众号:阿清的日常,后台留言,会细致解答。