DEVICE_ID sum(BUSINESS_ID) CLIENT_ID+BUSINESS_ID
101 46 16
101 46 18
101 46 28
102 31 20
102 31 22
103 55 24
103 55 26
103 55 30
ÊýŸÝ¿âÖеÄÊýŸÝ£º
timestamp row column value
2014-2-14 15:12 2014-02-01 13:2000000031013101 CLIENT_ID 3
2014-2-14 15:12 2014-02-01 13:2000000031013101 BUSINESS_ID 13
2014-2-14 15:12 2014-02-01 13:2000000031013101 DEVICE_ID 101
2014-2-14 15:15 2014-02-01 15:0000000041014101 CLIENT_ID 4
2014-2-14 15:15 2014-02-01 15:0000000041014101 BUSINESS_ID 14
2014-2-14 15:15 2014-02-01 15:0000000041014101 DEVICE_ID 101
2014-2-14 15:20 2014-02-01 16:0000000051015102 CLIENT_ID 5
2014-2-14 15:20 2014-02-01 16:0000000051015102 BUSINESS_ID 15
2014-2-14 15:20 2014-02-01 16:0000000051015102 DEVICE_ID 102
2014-2-14 15:27 2014-02-01 20:0000000061016102 CLIENT_ID 6
2014-2-14 15:27 2014-02-01 20:0000000061016102 BUSINESS_ID 16
2014-2-14 15:27 2014-02-01 20:0000000061016102 DEVICE_ID 102
2014-2-14 15:35 2014-02-02 10:0000000071017103 CLIENT_ID 7
2014-2-14 15:35 2014-02-02 10:0000000071017103 BUSINESS_ID 17
2014-2-14 15:35 2014-02-02 10:0000000071017103 DEVICE_ID 103
2014-2-14 15:39 2014-02-02 13:0000000081018103 CLIENT_ID 8
2014-2-14 15:39 2014-02-02 13:0000000081018103 BUSINESS_ID 18
2014-2-14 15:39 2014-02-02 13:0000000081018103 DEVICE_ID 103
2014-2-14 15:50 2014-02-02 19:0000000091019101 CLIENT_ID 9
2014-2-14 15:50 2014-02-02 19:0000000091019101 BUSINESS_ID 19
2014-2-14 15:50 2014-02-02 19:0000000091019101 DEVICE_ID 101
2014-2-14 16:10 2014-02-03 14:0000000101020103 CLIENT_ID 10
2014-2-14 16:10 2014-02-03 14:0000000101020103 BUSINESS_ID 20
2014-2-14 16:10 2014-02-03 14:0000000101020103 DEVICE_ID 103
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, Text>{
private static int count = 1;
private String c_id ;
private String b_id ;
private String d_id ;
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
String line = value.toString();
String ans="";
int len = line.length();
if(count%3==1){ // 计算 c_id
for(int i=53;i<len;i++){
if(line.charAt(i)>('0'-1) && line.charAt(i)<('9'+1)){
ans += line.charAt(i);
}
}
c_id = ans;
}
if(count%3==2){ // 计算 b_id
for(int i=53;i<len;i++){
if(line.charAt(i)>('0'-1) && line.charAt(i)<('9'+1))
ans += line.charAt(i);
}
b_id = ans;
}
if(count%3==0){ // 计算 d_id
for(int i=53;i<len;i++){
if(line.charAt(i)>('0'-1) && line.charAt(i)<('9'+1)){
ans += line.charAt(i);
}
}
d_id = ans;
context.write(new Text(d_id),new Text(b_id+" "+c_id));
}
count++;
}
}
public static class IntSumReducer
extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key, Iterable<Text> values,
Context context
) throws IOException, InterruptedException {
String line="";
int all=0;
for(Text val:values){
line=val.toString();
int ans = 0;
int tall=0;
for(int i=0;i<line.length();i++){
if(line.charAt(i)==' ') {
tall += ans;
all += ans;
ans = 0;
continue;
}
ans *= 10; ans+=(line.charAt(i)-'0');
}
tall += ans;
String s = key.toString();
s += " ";
s += val;
context.write(new Text(key), new Text(String.valueOf(tall)));
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
/*
获得数据
table1
101 16
101 18
101 28
102 20
102 22
103 24
103 26
103 30
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, Text>{
private static int count = 1;
private String c_id ;
private String b_id ;
private String d_id ;
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
String line = value.toString();
String ans="";
int len = line.length();
if(count%3==1){ // 计算 c_id
for(int i=53;i<len;i++){
if(line.charAt(i)>('0'-1) && line.charAt(i)<('9'+1)){
ans += line.charAt(i);
}
}
c_id = ans;
}
if(count%3==2){ // 计算 b_id
for(int i=53;i<len;i++){
if(line.charAt(i)>('0'-1) && line.charAt(i)<('9'+1))
ans += line.charAt(i);
}
b_id = ans;
}
if(count%3==0){ // 计算 d_id
for(int i=53;i<len;i++){
if(line.charAt(i)>('0'-1) && line.charAt(i)<('9'+1)){
ans += line.charAt(i);
}
}
d_id = ans;
context.write(new Text(d_id),new Text(b_id));
}
count++;
}
}
public static class IntSumReducer
extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key, Iterable<Text> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for(Text val:values){
String line = val.toString();
sum += Integer.parseInt(line);
}
context.write(key,new Text(String.valueOf(sum)));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
/*
获得数据
table2
101 46
102 31
103 55
*/