2019年安徽省大数据网络赛数据预处理(二)

数据

{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436920","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"NEWLIVEVIEW_QUIT_TAB","value":"0","du":""}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436923","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"EVENT_ZIP_UPLOAD","value":"1","du":""},"properties":{"property1":"1"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844841","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"AMAP_LOCATION_UPDATE","value":"0","du":"446"},"properties":{"property1":"0","property3":"1"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844865","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_START","value":"http:\/\/weather.api.moji.com\/data\/detail","du":""},"properties":{"property1":"1","property2":"0","property3":"{\"common\":{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"},\"params\":{\"city\":[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]}}","property4":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845076","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_UPDATE","value":"http:\/\/weather.api.moji.com\/data\/detail","du":"243"},"properties":{"property1":0,"property4":"1","property5":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424","property6":"weather.api.moji.com\/111.13.70.18:80"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845226","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"WEATHER_UPDATE","value":"1","du":"327"},"properties":{"property1":1,"property2":-1,"property3":"http:\/\/weather.api.moji.com\/data\/detail","property4":"2","property5":"RequestParams:[city=[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]], commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"}","property6":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845304","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.28037,104.452387"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845312","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"LOCATION_UPDATE","value":"0","du":"1096"},"properties":{"property1":"0","property3":"1"}}

题目要求

将原始数据中用户的"uid",“platform”,“app_version”,“pid”,"cityid"五个字段和其对应的值提取出来。

代码

package com.mr2;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class preTwo {
    public static class MyMapper extends Mapper<LongWritable,Text,Text,NullWritable>
    {
    	/*
    	 * 数据
    	 * {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android",
    	 * "app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503",
    	 * "iccid":"89860077221897301901","snsid":"","ts":"1557282063721","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.280233,104.452469"}}
    	 */
    	protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
    	{
    		StringBuffer k = new StringBuffer();
    		String s = String.valueOf(value);
    		//将原始数据进行切分
    		String[] split = s.split(",");
    		for(int i=0;i<split.length;i++)
    		{
    			//利用条件"uid\":"等可以过滤数据commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":
    			if(split[i].contains("uid\":")||split[i].contains("platform\":")||split[i].contains("app_version\":")||split[i].contains("pid\":")||split[i].contains("cityid\":"))
    			{
    				//利用这个if()过滤掉数据{"common":{"uid":"417705234"   "platform":"Android"   "app_version":"1001010000"   "pid":"4025"   "cityid":"" 
    				if(split[i].contains("uid\":"))
    				{
    					int m = split[i].indexOf("uid");
    					//从"uid"开始截取split[i]而不是从{"common":
    					k.append(split[i].substring(m-1)+"  ");
    				}
    				else
    				{
    					k.append(split[i]+"   ");
    				}
         			
    			}
    	}
    		String k1 = k.substring(0,k.length()-1);
    		context.write(new Text(k1), NullWritable.get());
    }
  }
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException
	{
		// TODO Auto-generated method stub
		Configuration conf = new Configuration();
 	   Job job = Job.getInstance(conf,preTwo.class.getSimpleName());
 	   job.setJarByClass(preTwo.class);
 	   job.setMapperClass(MyMapper.class);
 	   job.setReducerClass(Reducer.class);
 	   job.setMapOutputKeyClass(Text.class);
 	   job.setMapOutputValueClass(NullWritable.class);
 	   job.setOutputKeyClass(Text.class);
 	   job.setOutputValueClass(NullWritable.class);
 	   FileInputFormat.addInputPath(job,new Path(args[0]));
 	   FileOutputFormat.setOutputPath(job,new Path(args[1]));
 	   job.waitForCompletion(true);	   
	}
    }


结果

在这里插入图片描述

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值