6、学习大数据笔记-hdfs读写代码-CSDN博客

1、配置国内maven阿里更新库

在里面添加:
                <mirror>
  <id>alimaven</id>
  <mirrorOf>central</mirrorOf>
  <name>aliyun maven</name>
  <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </mirror>
复制代码

2、创建maven工程

启动idea 
复制代码

在pom.xml添加配置HDNF文件

 <properties>
    <hadoop.version>2.7.3</hadoop.version>
</properties>
<dependencies>
    <dependency>
        <groupId>commons-cli</groupId>
        <artifactId>commons-cli</artifactId>
        <version>1.2</version>
    </dependency>
    <dependency>
        <groupId>commons-logging</groupId>
        <artifactId>commons-logging</artifactId>
        <version>1.1.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
        <version>${hadoop.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.7.3</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>${hadoop.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-app</artifactId>
        <version>${hadoop.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-hs</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
</dependencies>

在windos下本地C盘hosts文件配置:文件位置;C:\Windows\System32\drivers\etc
192.168.200.11 node1
192.168.200.12 node2
192.168.200.13 node3
复制代码

3、复制HDFS配置文件到项目resource根目录下

3、编写上传代码

package hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.*;
import java.net.URI;

/**
 * 将本地文件系统的文件通过java-API写入到HDFS文件
 */
public class FileCopyFromLocal {

public static void main(String[] args) {
    String source = args[0];  //ben di lu jin
    String destination = args[1];   //hdfs lu jing

    InputStream in = null;
    try {
        in = new BufferedInputStream(new FileInputStream(source));
        //HDFS读写的配置文件
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(destination),conf);
        //调用Filesystem的create方法返回的是FSDataOutputStream对象
        //该对象不允许在文件中定位，因为HDFS只允许一个已打开的文件顺序写入或追加
        OutputStream out = fs.create(new Path(destination));

        IOUtils.copyBytes(in,out,4096,true);
        
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
  }   
}
复制代码

配置路径:/opt/bigdata/hadoop-2.7.3/etc/hadoop/kms-site.xml hdfs://node1:9000/data/kms-site.xml

4、编写从hdfs下载到本地代码

package hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.BufferedOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;

/**
 * 从HDFS读取文件
 * 打包运行jar包 [bruce@node-01 Desktop]$ hadoop jar com.kaikeba.hadoop-1.0-SNAPSHOT.jar  com.kaikeba.hadoop.hdfs.FileReadFromHdfs
 */
public class FileReadFromHdfs {

    public static void main(String[] args) {
        //
        String srcFile = "hdfs://node1:9000/data/kms-site.xml";
        Configuration conf = new Configuration();
        try {

        FileSystem fs = FileSystem.get(URI.create(srcFile),conf);
        FSDataInputStream hdfsInStream = fs.open(new Path(srcFile));

        BufferedOutputStream outputStream = null;
        outputStream = new BufferedOutputStream(new FileOutputStream("/home/hadoop/Templates/kms-site.xml"));
        IOUtils.copyBytes(hdfsInStream, outputStream, 4096, true);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }


 }
}
复制代码

5、SequenceFiles代码

    package sequenceFiles;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;

import java.io.IOException;
import java.net.URI;

public class SequenceFileWriteNewVersion {
    private static final String[] DATA = {
            "One, two, buckle my shoe",
            "Three, four, shut the door",
            "Five, six, pick up sticks",
            "Seven, eight, lay them straight",
            "Nine, ten, a big fat hen"
    };

public static void main(String[] args) {

    //输出路径
    String uri = args[0];
    Configuration conf = new Configuration();
    try {
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(args[0]);

        IntWritable key = new IntWritable();
        Text value = new Text();

        SequenceFile.Writer.Option pathOption       = SequenceFile.Writer.file(path);
        SequenceFile.Writer.Option keyOption        = SequenceFile.Writer.keyClass(IntWritable.class);
        SequenceFile.Writer.Option valueOption      = SequenceFile.Writer.valueClass(Text.class);
        SequenceFile.Writer.Option compressOption   = SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK);

        SequenceFile.Writer writer = SequenceFile.createWriter(conf, pathOption, keyOption, valueOption, compressOption);

        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
        IOUtils.closeStream(writer);

    } catch (IOException e) {
        e.printStackTrace();
    }
}
复制代码

}