java操作hive获取数据上传hdfs并maven打包依赖本地jar 带源码

最新推荐文章于 2021-02-24 23:30:00 发布

艾斯-李

最新推荐文章于 2021-02-24 23:30:00 发布

阅读量934

点赞数

分类专栏： java tools 文章标签： java hive hdfs maven 本地jar

本文链接：https://blog.csdn.net/las723/article/details/100579284

版权

java 同时被 2 个专栏收录

20 篇文章 0 订阅

订阅专栏

tools

2 篇文章 0 订阅

订阅专栏

背景

大数据平台配置可执行的jar包，需求是jar包能够获取hive表数据，并将数据上传到hdfs。

组件

jdk8 + hive + hdfs

源码

https://gitee.com/acelee723/acelee-hive-hdfs-main-jar

代码

1.hive操作类

import org.mortbay.util.ajax.JSON;

import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Hive操作类
 *
 * @author Ace Lee
 * @date 2019/8/27 14:18
 * @version 1.0
 **/
public class HiveClientUtils {

    private static String driverName = "com.cloudera.hive.jdbc4.HS2Driver";

    //填写hive的IP，之前在配置文件中配置的IP
    private static String Url = "jdbc:hive2://10.10.10.10:10000/hbzfw";

    private static Connection conn;

    private static PreparedStatement ps;

    private static ResultSet rs;

    //创建连接
    public static Connection getConnnection() {
        try {
            Class.forName(driverName);
            //此处的用户名一定是有权限操作HDFS的用户，否则程序会提示"permission deny"异常
            conn = DriverManager.getConnection(Url, "", "");
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
            System.exit(1);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return conn;
    }

    public static PreparedStatement prepare(Connection conn, String sql) {
        PreparedStatement ps = null;
        try {
            ps = conn.prepareStatement(sql);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return ps;
    }

    public static List<Map<String,Object>> getResult(String sql) {
        List<Map<String,Object>> rowDatas = new ArrayList<Map<String, Object>>();
        System.out.println(sql);
        conn = getConnnection();

        try {
            ps = prepare(conn, sql);
            rs = ps.executeQuery();
            ResultSetMetaData md = rs.getMetaData();
            int columCount = md.getColumnCount();

            while(rs.next()) {
                Map<String,Object> rowData = new HashMap<String, Object>();
                for(int i = 1; i <= columCount; i++) {
                    rowData.put(md.getColumnName(i), rs.getObject(i));
                }
                rowDatas.add(rowData);
            }
            System.out.println(JSON.toString(rowDatas));
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return rowDatas;
    }

}

2.hdfs操作类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.*;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * Hdfs操作类
 *
 * @author Ace Lee
 * @date 2019/8/27 14:18
 * @version 1.0
 **/
public class HdfsFileSystem {

    /**
     * HDFS集群路径
     */
    private static final String hdfsPath = "hdfs://10.10.10.10:8020";

    public static void copyFileToHDFSByName(Configuration conf,String localFileName, String remoteFileName) throws IOException {
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
        fs.copyFromLocalFile(new Path(localFileName), new Path(remoteFileName));
        System.out.println("copy from local file:" + localFileName + " to HDFS file:" + remoteFileName + " done.");
        fs.close();
    }

    public static void copyFileToHDFSByFileObj(byte[] bytes, String prefix, String fileName) throws IOException {
        InputStream in = null;
        try {
            Configuration conf = new Configuration();
            FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
            FSDataOutputStream out = fileSystem.create(new Path(buildPath(hdfsPath, "/document", prefix, fileName)));
            in = new ByteArrayInputStream(bytes);
            IOUtils.copyBytes(in, out, 4096, false);
            out.hsync();
            out.close();
        } finally {
            IOUtils.closeStream(in);
        }
        return;
    }

    public static void copyFileToHDFSByFileObj(String filename,String hdfsUri) throws IOException {
        if (null == hdfsUri || hdfsUri.isEmpty()) {
            System.err.println("copyFileToHDFSByFile: hdfsUri are required");
            return;
        }
        String localPath = HdfsFileSystem.class.getResource("").getPath();
        String localFile = localPath+filename;

        InputStream in = new FileInputStream(localFile);
        try {
            Configuration conf = new Configuration();
            FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
            FSDataOutputStream out = fileSystem.create(new Path(hdfsPath+hdfsUri+filename));
            IOUtils.copyBytes(in, out, 4096, false);
            out.hsync();
            out.close();
        } finally {
            IOUtils.closeStream(in);
        }
        return;
    }

    public static void copyFileToHDFSByFileObj(InputStream in, String prefix, String fileName) throws IOException {
        try {
            Configuration conf = new Configuration();
            FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
            FSDataOutputStream out = fileSystem.create(new Path(buildPath(hdfsPath, "/document", prefix, fileName)));
            IOUtils.copyBytes(in, out, 4096, false);
            out.hsync();
            out.close();
        } finally {
            IOUtils.closeStream(in);
        }
        return;
    }
    public static void copyFileToHDFSByFileObj(File localPath) throws IOException {
        InputStream in = null;
        if (null == localPath) {
            System.out.println("copyFileToHDFSByFile: localpath are required");
            return;
        }
        try {
            Configuration conf = new Configuration();
            FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
            FSDataOutputStream out = fileSystem.create(new Path(hdfsPath));

            in = new BufferedInputStream(new FileInputStream(localPath));
            IOUtils.copyBytes(in, out, 4096, false);
            out.hsync();
            out.close();
            //in.close();
        } finally {
            IOUtils.closeStream(in);
        }
        return;
    }

    /*
     * Download hdfs file in URI to local file
     */
    public static void downloadFromHDFS(Configuration conf, String uri, String remoteFileName, String localFileName) throws IOException {
        Path path = new Path(remoteFileName);
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        fs.copyToLocalFile(path, new Path(localFileName));
        fs.close();
        System.out.println("downloading file from " + remoteFileName + " to " + localFileName + " succeed");
        return;
    }


    /*
     * Download hdfs file in URI to local file
     */
    public static void downloadFromHDFS(String uri, String HDFSFileName, OutputStream localFileOutPut) throws IOException {
        Configuration config = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), config);
        InputStream is = fs.open(new Path(uri + "/" + HDFSFileName));
        IOUtils.copyBytes(is, localFileOutPut, 4096, true);//close in and out stream via this API itself.
        System.out.println("downloading HDFS file " + HDFSFileName + " succeed");
        fs.close();
        return;
    }

    public static InputStream downloadFromHDFS(String uri, String HDFSFileName) throws IOException {
        Configuration config = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), config);
        InputStream is = fs.open(new Path(uri + HDFSFileName));
        if (is == null) {
            System.out.println("hdfs inputStream is null");
        }
        return is;
    }

    /*
     * check whether the HDFS file exists in given URI
     */
    public static boolean exists(String HDFSUri, String HDFSFileName) {
        Configuration conf = new Configuration();
        boolean fileExists = false;
        try {
            FileSystem fileSystem = FileSystem.get(URI.create(HDFSUri), conf);
            fileExists = fileSystem.exists(new Path(HDFSUri + "/" + HDFSFileName));
        } catch (IOException e) {
            System.out.println("hdfs:exist() exception occurs. exception:" + e.getMessage());
            return fileExists;
        }

        System.out.println("HDFS URI:" + HDFSUri + ", fileName:" + HDFSFileName + " exists ? " + fileExists);
        return fileExists;
    }

    /**
     * 查看目录下面的文件
     *
     * @param uri
     * @param folder
     * @throws IOException
     */
    public static void ls(String uri, String folder) throws IOException {
        Configuration conf = new Configuration();
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        FileStatus[] list = fs.listStatus(path);
        System.out.println("ls: " + folder);
        System.out.println("==========================================================");
        for (FileStatus f : list) {
            System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDirectory(), f.getLen());
        }
        System.out.println("==========================================================");
        fs.close();
    }

    /**
     * 删除文件或者文件夹
     * @param uri
     * @param filePath
     * @throws IOException
     */
    public static void delete(String uri,String filePath) throws IOException {
        Configuration conf = new Configuration();
        Path path = new Path(filePath);
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        fs.deleteOnExit(path);
        System.out.println("Delete: " + filePath);
        fs.close();
    }


    public static String getCurrentDatePath(){
        return new SimpleDateFormat("yyyy/MM/dd/").format(new Date());
    }

    public static String getCurrentDateTime(){
        return new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
    }

    public static String buildPath(String... paths){
        StringBuffer buffer=new StringBuffer(paths.length>0?paths[0]:"");
        if(paths!=null&&paths.length>=2){
            for(int i=1;i<paths.length;i++){
                if(paths[i]==null||paths[i].length()==0){
                    continue;
                }
                if(paths[i-1].endsWith("/")){
                    if(paths[i].startsWith("/")){
                        buffer.append(paths[i].substring(1));
                    }else{
                        buffer.append(paths[i]);
                    }
                }else{
                    if(paths[i].startsWith("/")){
                        buffer.append(paths[i]);
                    }else{
                        buffer.append("/").append(paths[i]);
                    }
                }

            }
        }
        return buffer.toString();
    }


    public static String writeFile(String data) throws Exception{
        String localPath = HdfsFileSystem.class.getResource("").getPath();
        String localFilename = getCurrentDateTime()+".txt";
        localPath+=localFilename;
        File file = new File(localPath);

        //if file doesnt exists, then create it
        if(!file.exists()){
            file.createNewFile();
        }

        FileWriter fileWritter = new FileWriter(file.getName());
        BufferedWriter bufferWritter = new BufferedWriter(fileWritter);
        bufferWritter.write(data);
        bufferWritter.close();
        fileWritter.close();
        System.out.println(localPath+" [write] done");

        return localFilename;
    }

    public static void deleteFile(String filename) throws Exception{
        String localPath = HdfsFileSystem.class.getResource("").getPath();
        File file = new File(localPath+filename);
        if(file.isFile() && file.exists()){
            boolean delete = file.delete();
            System.out.println(file.getPath()+" [delete] "+delete);
        }
    }
}

3.主程序入口类

这里需要传参，直接在执行jar命令后添加参数，如java -jar xx.jar 参数1 参数2 ...
打包时在pom文件里指定mainClass的路径即可

import org.apache.commons.collections.CollectionUtils;
import java.util.List;
import java.util.Map;

public class QueryDatasApplication {

    public static void main(String[] args) {
        //**********这里获取jar执行命令中的参数**************
        //比如java -jar XXX.jar "sql"
        String sql = args[0];

        try {
            //查询hive
//            String sql = "select name,id_card from hbzfw.t_user";
            List<Map<String, Object>> result = HiveClientUtils.getResult(sql);
            if (CollectionUtils.isEmpty(result)){
                System.out.println("--------------query hive null");
                return;
            }

            //结果输出到hdfs
            ///data/result/2019/08/27/
            String resUri = "/data/result/"+HdfsFileSystem.getCurrentDatePath();

            String content = JacksonUtil.writeValueAsString(result);
            ///生成本地文件
            String filename = HdfsFileSystem.writeFile(content);
            //上传hdfs
            HdfsFileSystem.copyFileToHDFSByFileObj(filename,resUri);
            System.out.println("--------------send data to hdfs success");
            //删除本地文件
            HdfsFileSystem.deleteFile(filename);

        } catch (Exception e) {
            e.printStackTrace();
        }
    }




}

4.下面是依赖本地jar打包的Maven配置

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.aceleeyy</groupId>
    <artifactId>acelee-hive-hdfs-main-jar</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.2</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-log4j12</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
            <version>2.9.3</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>2.9.3</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-annotations</artifactId>
            <version>2.9.3</version>
        </dependency>
        <dependency>
            <groupId>com.cloudera</groupId>
            <artifactId>HiveJDBC4</artifactId>
            <version>1.4</version>
            <scope>system</scope>
            <systemPath>${project.basedir}/src/lib/HiveJDBC4.jar</systemPath>
        </dependency>
    </dependencies>


    <build>
        <plugins>
            <!-- maven jar in main class and dependencies start -->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>QueryDmDatasApplication</mainClass>
                        </manifest>
                    </archive>
                    <!--<descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>-->
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id> <!-- this is used for inheritance merges -->
                        <phase>package</phase> <!--  bind to the packaging phase  -->
                        <goals>
                            <goal>single</goal>
                        </goals>
                        <!-- 增加配置 -->
                        <configuration>
                            <!-- assembly.xml文件路径 -->
                            <descriptors>
                                <descriptor>src/assembly/assembly.xml</descriptor>
                            </descriptors>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!--  maven jar in main class and dependencies end -->
        </plugins>
    </build>


</project>

assembly.xml

<assembly>
    <id>jar-with-dependencies</id>
    <formats>
        <format>jar</format>
    </formats>
    <includeBaseDirectory>false</includeBaseDirectory>
    <dependencySets>
        <!-- 默认的配置 -->
        <dependencySet>
            <outputDirectory>/</outputDirectory>
            <useProjectArtifact>true</useProjectArtifact>
            <unpack>true</unpack>
            <scope>runtime</scope>
        </dependencySet>
        <!-- 增加scope类型为system的配置 -->
        <dependencySet>
            <outputDirectory>/</outputDirectory>
            <useProjectArtifact>true</useProjectArtifact>
            <unpack>true</unpack>
            <scope>system</scope>
        </dependencySet>
    </dependencySets>
</assembly>