Hive 茄子快传案例

多维度统计分析茄子快传用户信息

数据截图:

1.MR编程,将JSON数据转化,剔除脏数据,上传到Linux的

package nuc.edu.ls;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

public class AppLogClean {
	public static class MapTask extends Mapper<LongWritable, Text, Text, NullWritable> {
		StringBuilder sb = new StringBuilder();
		Text k = new Text();

		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
			// 得到每行数据
			String line = value.toString();
			JSONObject ob1 = JSON.parseObject(line);
			if (ob1 != null) {
				JSONObject ob2 = ob1.getJSONObject("header");
				if (ob2 != null) {
					// 关键数据是否有丢失
					// release_channel,device_id,city,device_id_type,app_ver_name
					// 这几个字段如果缺失,则过滤
					if (StringUtils.isBlank(ob2.getString("release_channel"))
							|| StringUtils.isBlank(ob2.getString("device_id"))
							|| StringUtils.isBlank(ob2.getString("city"))
							|| StringUtils.isBlank(ob2.getString("device_id_type"))
							|| StringUtils.isBlank(ob2.getString("app_ver_name"))
							|| StringUtils.isBlank(ob2.getString("os_name"))
							|| StringUtils.isBlank(ob2.getString("mac"))) {
						return;
					}

					if (ob2.getString("app_ver_name").equals("android")) {
						if (StringUtils.isBlank(ob2.getString("android_id"))) {
							return;
						}
					}
					sb.append(ob2.getString("cid_sn")).append(",");
					sb.append(ob2.getString("mobile_data_type")).append(",");
					sb.append(ob2.getString("os_ver")).append(",");
					sb.append(ob2.getString("mac")).append(",");
					sb.append(ob2.getString("resolution")).append(",");
					sb.append(ob2.getString("commit_time")).append(",");
					sb.append(ob2.getString("sdk_ver")).append(",");
					sb.append(ob2.getString("device_id_type")).append(",");
					sb.append(ob2.getString("city")).append(",");
					sb.append(ob2.getString("android_id")).append(",");
					sb.append(ob2.getString("device_model")).append(",");
					sb.append(ob2.getString("carrier")).append(",");
					sb.append(ob2.getString("promotion_channel")).append(",");
					sb.append(ob2.getString("app_ver_name")).append(",");
					sb.append(ob2.getString("imei")).append(",");
					sb.append(ob2.getString("app_ver_code")).append(",");
					sb.append(ob2.getString("pid")).append(",");
					sb.append(ob2.getString("net_type")).append(",");
					sb.append(ob2.getString("device_id")).append(",");
					sb.append(ob2.getString("app_device_id")).append(",");
					sb.append(ob2.getString("release_channel")).append(",");
					sb.append(ob2.getString("country")).append(",");
					sb.append(ob2.getString("time_zone")).append(",");
					sb.append(ob2.getString("os_name")).append(",");
					sb.append(ob2.getString("manufacture")).append(",");
					sb.append(ob2.getString("commit_id")).append(",");
					sb.append(ob2.getString("app_token")).append(",");
					sb.append(ob2.getString("account")).append(",");
					sb.append(ob2.getString("app_id")).append(",");
					sb.append(ob2.getString("build_num")).append(",");
					sb.append(ob2.getString("language")).append(",");

					String uid = ob2.getString("mac");// ???
					sb.append(uid);
					k.set(sb.toString());
					context.write(k, NullWritable.get());
					// 清除sb的数据
					sb.delete(0, sb.length());
				}
			}
		}
	}

	public static void main(String[] args) throws Exception {
		System.setProperty("HADDOP_USER_NAME", "root");
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);

		// 设置map,设置driver,设置输出类型。。。
		job.setJarByClass(AppLogClean.class);
		job.setMapperClass(MapTask.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		FileInputFormat.setInputDirRecursive(job, true);

		FileInputFormat.addInputPath(job, new Path("d:/AppData/appuserdata/input/"));
		FileOutputFormat.setOutputPath(job, new Path("d:/AppData/appuserdata/out"));
		// 不需要reduce 可以设置为0
		job.setNumReduceTasks(0);

		boolean ret = job.waitForCompletion(true);
		System.exit(ret ? 0 : 1);
	}
}

2.使用蜂房进行各种数据统计

每天的活跃用户
/** 创建表 用来加载清洗好的数据  **/
create table ods_app_log(
      cid_sn            string,
      mobile_data_type  string,
      os_ver            string,
      mac               string,
      resolution        string,
      commit_time       string,
      sdk_ver           string,
      device_id_type    string,
      city              string,
      android_id        string,
      device_model      string,
      carrier           string,
      promotion_channel string,
      app_ver_name      string,
      imei              string,
      app_ver_code      string,
      pid               string,
      net_type          string,
      device_id         string,
      app_device_id     string,
      release_channel   string,
      country           string,
      time_zone         string,
      os_name           string,
      manufacture       string,
      commit_id         string,
      app_token         string,
      account           string,
      app_id            string,
      build_num         string,
      language          string,
      uid               string
)
partitioned by(day string)
row format delimited 
fields terminated by ",";

/** 加载数据 **/
load data local inpath "/root/data/20170101" into table ods_app_log partition(day="20170101");

/** 创建活跃用户的表 **/
create table etl_user_active_day(
   uid string,
   commit_time string,
   city string,
   release_channel string,
   app_ver_name string
)partitioned by (day string);

/**  计算数据到活跃用户表  **/
/**  20170101   **/
insert into table etl_user_active_day partition(day="20170101")
select uid,commit_time,city , release_channel,app_ver_name
from 
(select 
uid,commit_time, city , release_channel,app_ver_name,
row_number() over(partition by uid order by commit_time) as rn
from ods_app_log where day="20170101") tmp1
where tmp1.rn=1;

insert into table etl_user_active_day partition(day="20170102")
select uid,commit_time,city , release_channel,app_ver_name
from 
(select 
uid,commit_time, city , release_channel,app_ver_name,
row_number() over(partition by uid order by commit_time) as rn
from ods_app_log where day="20170102") tmp1
where tmp1.rn=1;

/**    分维度进行统计   **/
时间	城市	渠道	版本	活跃用户
当天	0		0		0 
当天	0		0		1
当天	0		1		0
当天	0		1		1
当天	1		0		0
当天	1		0		1
当天	1		1		0
当天	1		1		1

/** 创建维度统计表 **/
create table dim_user_active(
   city string,
   release_channel string,
   app_ver_name string,
   active_user_cnt int
)partitioned by (day string,flag string);

//多重插入   读一次数据,多次计算
from etl_user_active_day

insert into table dim_user_active partition(day="20170101",flag="000")
select "all","all","all",count(1)  where day="20170101"

insert into table dim_user_active partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"
group by app_ver_name

insert into table dim_user_active partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"
group by release_channel

insert into table dim_user_active partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"
group by release_channel,app_ver_name

insert into table dim_user_active partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"
group by city

insert into table dim_user_active partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"
group by city,app_ver_name

insert into table dim_user_active partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"
group by city,release_channel

insert into table dim_user_active partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101"
group by city,release_channel,app_ver_name;


 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值