数据
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436920","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"NEWLIVEVIEW_QUIT_TAB","value":"0","du":""}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436923","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"EVENT_ZIP_UPLOAD","value":"1","du":""},"properties":{"property1":"1"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844841","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"AMAP_LOCATION_UPDATE","value":"0","du":"446"},"properties":{"property1":"0","property3":"1"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844865","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_START","value":"http:\/\/weather.api.moji.com\/data\/detail","du":""},"properties":{"property1":"1","property2":"0","property3":"{\"common\":{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"},\"params\":{\"city\":[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]}}","property4":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845076","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_UPDATE","value":"http:\/\/weather.api.moji.com\/data\/detail","du":"243"},"properties":{"property1":0,"property4":"1","property5":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424","property6":"weather.api.moji.com\/111.13.70.18:80"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845226","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"WEATHER_UPDATE","value":"1","du":"327"},"properties":{"property1":1,"property2":-1,"property3":"http:\/\/weather.api.moji.com\/data\/detail","property4":"2","property5":"RequestParams:[city=[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]], commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"}","property6":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845304","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.28037,104.452387"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845312","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"LOCATION_UPDATE","value":"0","du":"1096"},"properties":{"property1":"0","property3":"1"}}
题目要求
将原始数据中用户的"uid",“platform”,“app_version”,“pid”,"cityid"五个字段和其对应的值提取出来。
代码
package com.mr2;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class preTwo {
public static class MyMapper extends Mapper<LongWritable,Text,Text,NullWritable>
{
/*
* 数据
* {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android",
* "app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503",
* "iccid":"89860077221897301901","snsid":"","ts":"1557282063721","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.280233,104.452469"}}
*/
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
{
StringBuffer k = new StringBuffer();
String s = String.valueOf(value);
//将原始数据进行切分
String[] split = s.split(",");
for(int i=0;i<split.length;i++)
{
//利用条件"uid\":"等可以过滤数据commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":
if(split[i].contains("uid\":")||split[i].contains("platform\":")||split[i].contains("app_version\":")||split[i].contains("pid\":")||split[i].contains("cityid\":"))
{
//利用这个if()过滤掉数据{"common":{"uid":"417705234" "platform":"Android" "app_version":"1001010000" "pid":"4025" "cityid":""
if(split[i].contains("uid\":"))
{
int m = split[i].indexOf("uid");
//从"uid"开始截取split[i]而不是从{"common":
k.append(split[i].substring(m-1)+" ");
}
else
{
k.append(split[i]+" ");
}
}
}
String k1 = k.substring(0,k.length()-1);
context.write(new Text(k1), NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,preTwo.class.getSimpleName());
job.setJarByClass(preTwo.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.waitForCompletion(true);
}
}