HNU大数据并行处理系统(三)hdfs任务

使用Java API操作hdfs,实现以下功能:

package lab1.task4;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class STjoin {
    public static int time = 0; public static class Map extends Mapper<Object, Text, Text, Text>{
        public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
            String relationtype = new String();
            String line = value.toString();
            System.out.println("mapper...............");
            int i = 0;
            //遍历方法二:使用迭代器取出child和parent
            String[] values = new String[10];
            StringTokenizer itr = new StringTokenizer(line);
            while(itr.hasMoreTokens()){
                values[i] = itr.nextToken();
                i = i+1;
            }

            System.out.println("child:"+values[0]+"  parent:"+values[1]);
            if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1

                relationtype="1";
                context.write(new Text(values[1]),new Text(relationtype+"+"+values[0]));
                System.out.println("key:"+values[1]+"  value: "+relationtype+"+"+values[0]);
                relationtype = "2";
                context.write(new Text(values[0]), new Text(relationtype+"+"+values[1]));
                System.out.println("key:"+values[0]+"  value: "+relationtype+"+"+values[1]);
            }
        }
    }

    public static class Reduce extends Reducer<Text, Text, Text, Text>{
        public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
            System.out.println("reduce.....................");
            System.out.println("key:"+key+"  values:"+values);
            if(time==0){
                context.write(new Text("grandchild"), new Text("grandparent"));
                time++;
            }
            int grandchildnum = 0;
            String grandchild[] = new String[10];
            int grandparentnum = 0;
            String grandparent[] = new String[10];

            String name = new String();
            //遍历方法二:用for循环
            for(Text val : values){
                //    String record = ite.next().toString();
                String record = val.toString();
                System.out.println("record: "+record);

                int i = 2;
                char relationtype = record.charAt(0);
                name = record.substring(i);

                System.out.println("name: "+name);

                if (relationtype=='1') {
                    grandchild[grandchildnum] = name;
                    grandchildnum++;
                }
                else{
                    grandparent[grandparentnum]=name;
                    grandparentnum++;
                }
            }
            //遍历方法三:就是详细方法的charAt(),一个一个字符遍历
            if(grandparentnum!=0&&grandchildnum!=0){
                for(int m = 0 ; m < grandchildnum ; m++){
                    for(int n = 0 ; n < grandparentnum; n++){
                        context.write(new Text(grandchild[m]), new Text(grandparent[n]));
                        System.out.println("grandchild: "+grandchild[m]+"  grandparent: "+grandparent[n]);
                    }
                }
            }
        }
    }
    public static void main(String [] args)throws Exception{
        Configuration conf = new Configuration();
        Job job = new Job(conf,"single table join");
        job.setJarByClass(STjoin.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/input4/"));
        FileOutputFormat.setOutputPath(job,new Path("hdfs://master:9000/output4/"));

        System.exit(job.waitForCompletion(true)? 0 : 1);
    }
}

在这里插入图片描述
1、 在hdfs上创建文件

package lab3.task1;

public class work1 {
    public static void main(String[] args) {
        HdfsUtil hdfs1 = new HdfsUtil("hadoop");
        hdfs1.mkdir("hdfs://master:9000/lab3/task2/");
        System.out.println("success!");
    }
}

2、 删除hdfs上的文件

package lab3.task1;

public class work2 {
    public static void main(String[] args) {
        HdfsUtil hdfs1 = new HdfsUtil("hadoop");
        hdfs1.delete("hdfs://master:9000/lab3/task2/",true);
        System.out.println("success!");
    }
}

3、 上传文件至hdfs

package lab3.task1;
public class work3 {
    public static void main(String[] args) {
        HdfsUtil hdfs1 = new HdfsUtil("hadoop");
        hdfs1.upload("C:\\Users\\dell\\Desktop\\3.txt","hdfs://master:9000/lab3/task1/3.txt");
        System.out.println("success!");
    }
}

4、 读取文件内容

package lab3.task1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.*;

public class work4 {
    /**
     * 读取文件内容
     */
    public static void cat(Configuration conf, String remoteFilePath) throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path remotePath = new Path(remoteFilePath);
        FSDataInputStream in = fs.open(remotePath);
        BufferedReader d = new BufferedReader(new InputStreamReader(in));
        String line = null;
        while ((line = d.readLine()) != null) {
            String[] strarray = line.split(" ");
            for (int i = 0; i < strarray.length; i++) {
                System.out.print(strarray[i]);
                System.out.print(" ");

            }

            System.out.println(" ");
            // System.out.println(line);

            // System.out.print(strarray[0]);
        }
        d.close();
        in.close();
        fs.close();
    }

    /**
     * 主函数
     */
    public static void main(String[] args) {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://master:9000");
        String remoteFilePath = "hdfs://master:9000/lab3/task1/3.txt"; // HDFS路径

        try {
            System.out.println("读取文件: " + remoteFilePath);
            work4.cat(conf, remoteFilePath);
            System.out.println("\n读取完成");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值