java 读取 hadoop 的文件 demo

是什么?

      使用java 读取 hadoop 中的一个文件。

 

为什么?

     用于初步接触hadoop开发。

 

怎么做?

一、依赖jar

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>myHadoop</groupId>
    <artifactId>myHadoop</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <hadoop.hdfs.version>2.4.0</hadoop.hdfs.version>
        <hadoop.mapreduce.client.core.version>2.4.0</hadoop.mapreduce.client.core.version>
        <hadoop.common.version>2.4.0</hadoop.common.version>
        <hadoop.mapreduce.client.common.version>2.4.0</hadoop.mapreduce.client.common.version>
        <hadoop.mapreduce.client.jobclient.version>2.4.0</hadoop.mapreduce.client.jobclient.version>
        <commons.lang.version>3.3.2</commons.lang.version>
        <commons.io.version>2.4</commons.io.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.hdfs.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoop.mapreduce.client.core.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.common.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>${hadoop.mapreduce.client.common.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>${hadoop.mapreduce.client.jobclient.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>${commons.lang.version}</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>${commons.io.version}</version>
        </dependency>
    </dependencies>

    <build>
        <finalName>gQuery-${version}</finalName>
        <!-- <resources> <resource> <directory>src/main/resources</directory> <filtering>true</filtering>
            <excludes> <exclude>*</exclude> <exclude>*/*</exclude> </excludes> </resource>
            </resources> -->
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.1</version>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                    <encoding>utf8</encoding>
                </configuration>
            </plugin>
            <!-- Jar 插件包含建立Jar文件的目标 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>2.4</version>
                <!-- The configuration of the plugin -->
                <configuration>
                    <!-- Configuration of the archiver -->
                    <archive>
                        <!-- 生成的jar中,不要包含pom.xml和pom.properties这两个文件 -->
                        <addMavenDescriptor>false</addMavenDescriptor>

                        <!-- Manifest specific configuration -->
                        <manifest>
                            <!-- 是否要把第三方jar放到manifest的classpath中 -->
                            <addClasspath>true</addClasspath>
                            <!-- 生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/ -->
                            <classpathPrefix>lib/</classpathPrefix>
                            <!-- 应用的main class -->
                            <mainClass>com.geotmt.hadoop.App</mainClass>
                        </manifest>
                        <!-- 用maven在MANIFEST.MF资料中的Class-Path中增加当前目录(.)  -->
                        <manifestEntries>
                            <Class-Path>.</Class-Path>
                        </manifestEntries>
                    </archive>
                    <!--排除的模式列表,例如**/*.xml-->
                    <excludes>
                        <exclude>**/*.properties</exclude>
                        <exclude>**/*.xml</exclude>
                    </excludes>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-resources-plugin</artifactId>
                <version>3.1.0</version>
                <executions>
                    <execution>
                        <id>copy-resources</id>
                        <phase>package</phase>
                        <goals>
                            <goal>copy-resources</goal>
                        </goals>
                        <configuration>
                            <encoding>UTF-8</encoding>
                            <!--被编译过的应用程序class文件存放的目录。-->
                            <outputDirectory>${project.build.directory}/etc</outputDirectory>
                            <resources>
                                <!--这个元素描述了项目相关或测试相关的所有资源路径-->
                                <resource>
                                    <!--描述存放资源的目录,该路径相对POM路径-->
                                    <directory>src/main/resources/</directory>
                                    <!--包含的模式列表,例如**/*.xml.-->
                                    <includes>
                                        <exclude>**/*.properties</exclude>
                                        <exclude>**/*.xml</exclude>
                                    </includes>
                                    <!--是否使用参数值代替参数名。参数值取自properties元素或者文件里配置的属性,文件在filters元素里列出。-->
                                    <filtering>true</filtering>
                                </resource>
                            </resources>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

            <!-- 把依赖的jar包拷到lib目录下 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <!--在构建生命周期中执行一组目标的配置。每个目标可能有不同的配置。-->
                <executions>
                    <!--execution元素包含了插件执行需要的信息-->
                    <execution>
                        <!--执行目标的标识符,用于标识构建过程中的目标,或者匹配继承过程中需要合并的执行目标-->
                        <id>copy-dependencies</id>
                        <!--绑定了目标的构建生命周期阶段,如果省略,目标会被绑定到源数据里配置的默认阶段-->
                        <phase>package</phase>
                        <!--配置的执行目标-->
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <!--被编译过的应用程序class文件存放的目录。-->
                            <outputDirectory>
                                ${project.build.directory}/etc/lib
                            </outputDirectory>
                            <overWriteReleases>false</overWriteReleases>
                            <overWriteSnapshots>false</overWriteSnapshots>
                            <overWriteIfNewer>true</overWriteIfNewer>
                            <excludeArtifactIds>junit</excludeArtifactIds>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

java代码

package com.geotmt.hadoop.hdfs;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.InputStream;
import java.net.URI;

/**
 * 从Hdfs读取一个文件
 *
 * Created by c.z on 2018/8/10. */
public class ReadFileHandle {

    public static void main(String[] args) throws Exception {
        ReadFileHandle readFileHandle = new ReadFileHandle();
        readFileHandle.readFile();
    }
    
    public void readFile() throws Exception{
        String uri = "hdfs://10.111.32.142:8020/user/zhaochao/20180727/sms.1532681090912.log.tmp";
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri),conf);
        try(InputStream in = fs.open(new Path(uri)))
        {
            IOUtils.copy(in,System.out);
        }
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值