java mapreduce入门实例-实现按某个字段排序

1、新建一个空的 poem 工程

poem 文件如下:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>easoumapreducedemo</groupId>
  <artifactId>easoumapreducedemo</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>easoumapreducedemo</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>
  <dependencies>
    <dependency>
		<groupId>org.apache.hadoop</groupId>
		<artifactId>hadoop-hdfs</artifactId>
		<version>2.6.0</version>
	</dependency>
	<dependency>
		<groupId>org.apache.hadoop</groupId>
		<artifactId>hadoop-mapreduce-client-core</artifactId>
		<version>2.6.0</version>
	</dependency>
	<dependency>
		<groupId>org.apache.hadoop</groupId>
		<artifactId>hadoop-common</artifactId>
		<version>2.6.0</version>
		<scope>provided</scope>
	</dependency>
  
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
  </dependencies>
  <build>
		<plugins>
			<plugin>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>2.3.2</version>
				<configuration>
					<source>1.7</source>
					<target>1.7</target>
				</configuration>
			</plugin>
			<plugin>
				<artifactId> maven-assembly-plugin </artifactId>
				<configuration>
					<descriptorRefs>
						<descriptorRef>jar-with-dependencies</descriptorRef>
					</descriptorRefs>
					<archive>
						<manifest>
                            <mainClass>easoumapreducedemo.WordCount</mainClass>
						</manifest> 
					</archive>
				</configuration>
				<executions>
					<execution>
						<id>make-assembly</id>
						<phase>package</phase>
						<goals>
							<goal>single</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
		</plugins>
	</build>
</project>

2、java 文件如下 

package easoumapreducedemo;

import java.io.IOException;
import java.util.Iterator;
import java.util.UUID;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * Hello world!
 * 艮目某个字段排序
 *
 */
public class WordCount 
{

    @SuppressWarnings("deprecation")
	public static void main( String[] args ) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException
    {
        // 全局参数 也可以从参数 args 中获取 
         String date1="";
 
    	 Configuration conf = new Configuration();
         // 参数放入配置文件中
         conf.set("date1", date1);
         // 配置 前面是固定的 后面根据情况修改
         conf.set("fs.defaultFS", "hdfs://nameservice1:8888");
         // 配置 前面是固定的 后面根据情况修改
         conf.set("yarn.resourcemanager.hostname", "master005");
         // 设置内存 
         conf.set("mapreduce.reduce.java.opts", "-Xmx4096m");
         conf.set("mapreduce.reduce.memory.mb", "4096");
         Job job = Job.getInstance(conf);
         job.setJarByClass(WordCount.class);
         job.setMapperClass(WordCountMap.class);
         job.setReducerClass(WordCountReduce.class);
         job.setMapOutputKeyClass(Text.class);
         job.setMapOutputValueClass(IntWritable.class);
         job.setNumReduceTasks(10);
         // 输入目录 可以增加多个 多个为多行的
         FileInputFormat.addInputPath(job, new Path("/user/hive/输入文件目录"));
         // 输出目录
         Path outPath = new Path("/user/output/输出目录");
         FileSystem fs = FileSystem.get(conf);
         if (fs.exists(outPath)) {
             fs.delete(outPath);
         }
         FileOutputFormat.setOutputPath(job, outPath);
         boolean flag = job.waitForCompletion(true);
         if (flag) {
             System.out.println("job success");
         } else {
             System.out.println("job fail");
         }
         System.exit(flag ? 0 : 1);
    }
    
    
    
    public static class WordCountMap extends Mapper<Object, Text, Text, IntWritable> {
        Text keyText = new Text();
        IntWritable val =  new IntWritable(1);
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
           // 获取配置参数
           String date1 =  context.getConfiguration().get("date1");
            String line = value.toString();
            String words[] =line.split("\\u0001");
            // 获取当前处理文件的路径名字
            InputSplit inputSplit = context.getInputSplit();
            String fileName = ((FileSplit) inputSplit).getPath().getParent().getParent().getName();
              // datestr 排序使用
              keyText.set(dateStr);
              lineText.set(line);

            context.write(keyText, lineText);                
        
           

           
        }
    }
    
    
    public static class WordCountReduce extends Reducer<Text, IntWritable, Text, NullWritable> {
    	  Text keyText = new Text();
    	@Override
    	protected void reduce(Text key, Iterable<IntWritable> values, Context context)
    			throws IOException, InterruptedException {
    		
		  Iterator<Text> iterable = values.iterator();
    		  while (iterable.hasNext()) {
                  Text v = iterable.next();
                  keyText.set(v);
                  context.write(keyText,val);
    		  }
    	}
    }
    

    
}

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值