hadoop的filesplit

最新推荐文章于 2021-02-28 11:43:22 发布

鹅厂zone边旅游边写代码

最新推荐文章于 2021-02-28 11:43:22 发布

阅读量1k

点赞数

分类专栏： DB技术&&数仓技术文章标签： hadoop

DB技术&&数仓技术专栏收录该内容

179 篇文章 3 订阅

订阅专栏

hadoop的FileSplit简单使用

FileSplit类继承关系：

FileSplit类中的属性和方法：

作业输入：

[java]view plaincopy 
   
print?
 hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt  
 hadoop  a  
 spark   a  
 hive    a  
 hbase   a  
 tachyon a  
 storm   a  
 redis   a  
 hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt  
 hadoop  b  
 spark   b  
 kafka   b  
 tachyon b  
 oozie   b  
 flume   b  
 sqoop   b  
 solr    b  
 hadoop@hadoop:/home/hadoop/blb$   

代码：

[java]view plaincopy 
   
print?
 import java.io.IOException;  
   
   
 import org.apache.hadoop.conf.Configuration;  
 import org.apache.hadoop.fs.Path;  
 import org.apache.hadoop.io.LongWritable;  
 import org.apache.hadoop.io.NullWritable;  
 import org.apache.hadoop.io.Text;  
 import org.apache.hadoop.mapred.SplitLocationInfo;  
 import org.apache.hadoop.mapreduce.Job;  
 import org.apache.hadoop.mapreduce.Mapper;  
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;  
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;  
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;  
 import org.apache.hadoop.util.GenericOptionsParser;  
   
   
 public class GetSplitMapReduce {  
     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
         Configuration conf = new Configuration();  
         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();  
         if(otherArgs.length!=2){  
             System.err.println("Usage databaseV1 <inputpath> <outputpath>");  
         }  
   
   
         Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");  
         job.setJarByClass(GetSplitMapReduce.class);  
         job.setMapOutputKeyClass(Text.class);  
         job.setMapOutputValueClass(Text.class);  
         job.setOutputKeyClass(Text.class);  
         job.setOutputValueClass(NullWritable.class);  
         job.setMapperClass(MyMapper1.class);  
         job.setNumReduceTasks(0);  
         job.setInputFormatClass(TextInputFormat.class);  
         job.setOutputFormatClass(TextOutputFormat.class);  
         FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  
         FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));  
         job.waitForCompletion(true);  
     }  
     public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{  
         @Override  
         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)  
                 throws IOException, InterruptedException {  
           
             FileSplit fileSplit=(FileSplit) context.getInputSplit();  
             String pathname=fileSplit.getPath().getName();  //获取目录名字  
             int depth = fileSplit.getPath().depth();    //获取目录深度  
             Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类  
             long length = fileSplit.getLength();    //获取文件长度  
             SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo(); //获取位置信息  
             String[] locations = fileSplit.getLocations();  //获取位置  
             long start = fileSplit.getStart(); //The position of the first byte in the file to process.  
             String string = fileSplit.toString();  
             //fileSplit.  
               
             context.write(new Text("===================================================================================="), NullWritable.get());  
             context.write(new Text("pathname--"+pathname), NullWritable.get());  
             context.write(new Text("depth--"+depth), NullWritable.get());  
             context.write(new Text("class1--"+class1), NullWritable.get());  
             context.write(new Text("length--"+length), NullWritable.get());  
             context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());  
             context.write(new Text("locations--"+locations), NullWritable.get());  
             context.write(new Text("start--"+start), NullWritable.get());  
             context.write(new Text("string--"+string), NullWritable.get());  
         }  
     }  
 }  

对应inputpath2.txt文件的输出：

[java]view plaincopy 
   
print?
 hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@4ff41ba0  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@2341ce62  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@35549603  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@4444ba4f  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@7c23bb8c  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@dee2400  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@d7d8325  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
 ====================================================================================  
 pathname--inputpath2.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--66  
 locationInfo--null  
 locations--[Ljava.lang.String;@2b2cf90e  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  

对应inputpath1.txt文件的输出：

[java]view plaincopy 
   
print?
 hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00001  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@4ff41ba0  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@2341ce62  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@35549603  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@4444ba4f  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@7c23bb8c  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@dee2400  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 ====================================================================================  
 pathname--inputpath1.txt  
 depth--5  
 class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
 length--58  
 locationInfo--null  
 locations--[Ljava.lang.String;@d7d8325  
 start--0  
 string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
 hadoop@hadoop:/home/hadoop/blb$