<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cc.pxj.wfy</groupId>
<artifactId>phoneWcRuoZe</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<hadoop.version>2.6.0-cdh5.16.2</hadoop.version>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<!-- 添加Hadoop依赖 -->
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.17</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
java代码
package com.ccj.pxj.homework.distinct;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class DistinctMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//String data = value.toString();
context.write(value,NullWritable.get());
}
}
package com.ccj.pxj.homework.distinct;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class DistinctReducer extends Reducer<Text, NullWritable,Text,NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
package com.ccj.pxj.homework.distinct;
import com.ccj.pxj.homework.two.wc.one.WcMapper;
import com.ccj.pxj.homework.two.wc.one.WcReducer;
import com.ccj.pxj.phone.utils.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class DistinctDriver implements Tool {
private Configuration conf;
@Override
public int run(String[] args) throws Exception {
String OutPath="wc/distinct/out";
String InputPath="data/distinct.txt";
// 1. 获得 Job 对象
Job job = Job.getInstance(getConf());
// 2. 设置主类
job.setJarByClass(DistinctDriver.class);
// 3. 设置 Mapper 类
job.setMapperClass(DistinctMapper.class);
// 4. 不需要reduce
job.setReducerClass(DistinctReducer.class);
// 5. 设置 Map key-value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// 6. 设置输入路径
FileUtils.deleteOutput(conf,OutPath);
FileInputFormat.setInputPaths(job, new Path(InputPath));
// 7 设置输出路径
FileOutputFormat.setOutputPath(job,new Path(OutPath));
return job.waitForCompletion(true) ? 0 : 1;
}
@Override
public void setConf(Configuration conf) {
this.conf=conf;
}
@Override
public Configuration getConf() {
return this.conf;
}
public static void main(String[] args) throws Exception {
int resultCode = ToolRunner.run(new DistinctDriver(), args);
if (resultCode == 0) {
System.out.println("执行成功!");
} else {
System.out.println("执行失败!");
}
}
}
数据
pxj pxj pxj
pxj pxj pxj
ccj pxj wfy
ccj ccj pxj
pxj wfy pxj
pxj pxj ccj
pxj wfy wlp
pxj wfy pxj
wxc ccj jpeson
pxj pxj wfy
pxj wlp wfy
pxj pxj wlp
pxj pxj wfy
pxj pxj pxj
pxj wxc pxj
pxj pxj ccj
pk wxc pxj
pxj pxj ccj
pxj pxj zcl
wlp pxj lzh
pxj wfy wxc
pxj pxj pxj
wlp pxj wxc
ccj lzh pxj
pxj pxj pxj
pxj wfy pxj
ccj pxj wfy
pxj pxj lzh
pxj pxj ccj
wfy wfy ccj
结果
ccj ccj pxj
ccj lzh pxj
ccj pxj wfy
pk wxc pxj
pxj pxj ccj
pxj pxj lzh
pxj pxj pxj
pxj pxj wfy
pxj pxj wlp
pxj pxj zcl
pxj wfy pxj
pxj wfy wlp
pxj wfy wxc
pxj wlp wfy
pxj wxc pxj
wfy wfy ccj
wlp pxj lzh
wlp pxj wxc
wxc ccj jpeson
作者:pxj(潘陈)