HNU大数据并行处理系统（三）hdfs任务

最新推荐文章于 2024-05-16 21:51:17 发布

江潭落月复西斜

最新推荐文章于 2024-05-16 21:51:17 发布

阅读量536

点赞数

分类专栏：小学期文章标签： hdfs 大数据 hadoop

本文链接：https://blog.csdn.net/qq_45785060/article/details/126693508

版权

小学期专栏收录该内容

11 篇文章 1 订阅

订阅专栏

使用Java API操作hdfs，实现以下功能：

package lab1.task4;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class STjoin {
    public static int time = 0; public static class Map extends Mapper<Object, Text, Text, Text>{
        public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
            String relationtype = new String();
            String line = value.toString();
            System.out.println("mapper...............");
            int i = 0;
            //遍历方法二：使用迭代器取出child和parent
            String[] values = new String[10];
            StringTokenizer itr = new StringTokenizer(line);
            while(itr.hasMoreTokens()){
                values[i] = itr.nextToken();
                i = i+1;
            }

            System.out.println("child："+values[0]+"  parent:"+values[1]);
            if(values[0].compareTo("child") != 0){//如果是child，则为0，否则为-1

                relationtype="1";
                context.write(new Text(values[1]),new Text(relationtype+"+"+values[0]));
                System.out.println("key:"+values[1]+"  value: "+relationtype+"+"+values[0]);
                relationtype = "2";
                context.write(new Text(values[0]), new Text(relationtype+"+"+values[1]));
                System.out.println("key:"+values[0]+"  value: "+relationtype+"+"+values[1]);
            }
        }
    }

    public static class Reduce extends Reducer<Text, Text, Text, Text>{
        public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
            System.out.println("reduce.....................");
            System.out.println("key:"+key+"  values:"+values);
            if(time==0){
                context.write(new Text("grandchild"), new Text("grandparent"));
                time++;
            }
            int grandchildnum = 0;
            String grandchild[] = new String[10];
            int grandparentnum = 0;
            String grandparent[] = new String[10];

            String name = new String();
            //遍历方法二：用for循环
            for(Text val : values){
                //    String record = ite.next().toString();
                String record = val.toString();
                System.out.println("record: "+record);

                int i = 2;
                char relationtype = record.charAt(0);
                name = record.substring(i);

                System.out.println("name: "+name);

                if (relationtype=='1') {
                    grandchild[grandchildnum] = name;
                    grandchildnum++;
                }
                else{
                    grandparent[grandparentnum]=name;
                    grandparentnum++;
                }
            }
            //遍历方法三：就是详细方法的charAt()，一个一个字符遍历
            if(grandparentnum!=0&&grandchildnum!=0){
                for(int m = 0 ; m < grandchildnum ; m++){
                    for(int n = 0 ; n < grandparentnum; n++){
                        context.write(new Text(grandchild[m]), new Text(grandparent[n]));
                        System.out.println("grandchild: "+grandchild[m]+"  grandparent: "+grandparent[n]);
                    }
                }
            }
        }
    }
    public static void main(String [] args)throws Exception{
        Configuration conf = new Configuration();
        Job job = new Job(conf,"single table join");
        job.setJarByClass(STjoin.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/input4/"));
        FileOutputFormat.setOutputPath(job,new Path("hdfs://master:9000/output4/"));

        System.exit(job.waitForCompletion(true)? 0 : 1);
    }
}

在这里插入图片描述
1、在hdfs上创建文件

package lab3.task1;

public class work1 {
    public static void main(String[] args) {
        HdfsUtil hdfs1 = new HdfsUtil("hadoop");
        hdfs1.mkdir("hdfs://master:9000/lab3/task2/");
        System.out.println("success!");
    }
}

2、删除hdfs上的文件

package lab3.task1;

public class work2 {
    public static void main(String[] args) {
        HdfsUtil hdfs1 = new HdfsUtil("hadoop");
        hdfs1.delete("hdfs://master:9000/lab3/task2/",true);
        System.out.println("success!");
    }
}

3、上传文件至hdfs

package lab3.task1;
public class work3 {
    public static void main(String[] args) {
        HdfsUtil hdfs1 = new HdfsUtil("hadoop");
        hdfs1.upload("C:\\Users\\dell\\Desktop\\3.txt","hdfs://master:9000/lab3/task1/3.txt");
        System.out.println("success!");
    }
}

4、读取文件内容

package lab3.task1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.*;

public class work4 {
    /**
     * 读取文件内容
     */
    public static void cat(Configuration conf, String remoteFilePath) throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path remotePath = new Path(remoteFilePath);
        FSDataInputStream in = fs.open(remotePath);
        BufferedReader d = new BufferedReader(new InputStreamReader(in));
        String line = null;
        while ((line = d.readLine()) != null) {
            String[] strarray = line.split(" ");
            for (int i = 0; i < strarray.length; i++) {
                System.out.print(strarray[i]);
                System.out.print(" ");

            }

            System.out.println(" ");
            // System.out.println(line);

            // System.out.print(strarray[0]);
        }
        d.close();
        in.close();
        fs.close();
    }

    /**
     * 主函数
     */
    public static void main(String[] args) {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://master:9000");
        String remoteFilePath = "hdfs://master:9000/lab3/task1/3.txt"; // HDFS路径

        try {
            System.out.println("读取文件: " + remoteFilePath);
            work4.cat(conf, remoteFilePath);
            System.out.println("\n读取完成");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}