背景
大数据平台配置可执行的jar包,需求是jar包能够获取hive表数据,并将数据上传到hdfs。
组件
jdk8 + hive + hdfs
源码
https://gitee.com/acelee723/acelee-hive-hdfs-main-jar
代码
1.hive操作类
import org.mortbay.util.ajax.JSON;
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Hive操作类
*
* @author Ace Lee
* @date 2019/8/27 14:18
* @version 1.0
**/
public class HiveClientUtils {
private static String driverName = "com.cloudera.hive.jdbc4.HS2Driver";
//填写hive的IP,之前在配置文件中配置的IP
private static String Url = "jdbc:hive2://10.10.10.10:10000/hbzfw";
private static Connection conn;
private static PreparedStatement ps;
private static ResultSet rs;
//创建连接
public static Connection getConnnection() {
try {
Class.forName(driverName);
//此处的用户名一定是有权限操作HDFS的用户,否则程序会提示"permission deny"异常
conn = DriverManager.getConnection(Url, "", "");
} catch (ClassNotFoundException e) {
e.printStackTrace();
System.exit(1);
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
public static PreparedStatement prepare(Connection conn, String sql) {
PreparedStatement ps = null;
try {
ps = conn.prepareStatement(sql);
} catch (SQLException e) {
e.printStackTrace();
}
return ps;
}
public static List<Map<String,Object>> getResult(String sql) {
List<Map<String,Object>> rowDatas = new ArrayList<Map<String, Object>>();
System.out.println(sql);
conn = getConnnection();
try {
ps = prepare(conn, sql);
rs = ps.executeQuery();
ResultSetMetaData md = rs.getMetaData();
int columCount = md.getColumnCount();
while(rs.next()) {
Map<String,Object> rowData = new HashMap<String, Object>();
for(int i = 1; i <= columCount; i++) {
rowData.put(md.getColumnName(i), rs.getObject(i));
}
rowDatas.add(rowData);
}
System.out.println(JSON.toString(rowDatas));
} catch (SQLException e) {
e.printStackTrace();
}
return rowDatas;
}
}
2.hdfs操作类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* Hdfs操作类
*
* @author Ace Lee
* @date 2019/8/27 14:18
* @version 1.0
**/
public class HdfsFileSystem {
/**
* HDFS集群路径
*/
private static final String hdfsPath = "hdfs://10.10.10.10:8020";
public static void copyFileToHDFSByName(Configuration conf,String localFileName, String remoteFileName) throws IOException {
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
fs.copyFromLocalFile(new Path(localFileName), new Path(remoteFileName));
System.out.println("copy from local file:" + localFileName + " to HDFS file:" + remoteFileName + " done.");
fs.close();
}
public static void copyFileToHDFSByFileObj(byte[] bytes, String prefix, String fileName) throws IOException {
InputStream in = null;
try {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
FSDataOutputStream out = fileSystem.create(new Path(buildPath(hdfsPath, "/document", prefix, fileName)));
in = new ByteArrayInputStream(bytes);
IOUtils.copyBytes(in, out, 4096, false);
out.hsync();
out.close();
} finally {
IOUtils.closeStream(in);
}
return;
}
public static void copyFileToHDFSByFileObj(String filename,String hdfsUri) throws IOException {
if (null == hdfsUri || hdfsUri.isEmpty()) {
System.err.println("copyFileToHDFSByFile: hdfsUri are required");
return;
}
String localPath = HdfsFileSystem.class.getResource("").getPath();
String localFile = localPath+filename;
InputStream in = new FileInputStream(localFile);
try {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
FSDataOutputStream out = fileSystem.create(new Path(hdfsPath+hdfsUri+filename));
IOUtils.copyBytes(in, out, 4096, false);
out.hsync();
out.close();
} finally {
IOUtils.closeStream(in);
}
return;
}
public static void copyFileToHDFSByFileObj(InputStream in, String prefix, String fileName) throws IOException {
try {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
FSDataOutputStream out = fileSystem.create(new Path(buildPath(hdfsPath, "/document", prefix, fileName)));
IOUtils.copyBytes(in, out, 4096, false);
out.hsync();
out.close();
} finally {
IOUtils.closeStream(in);
}
return;
}
public static void copyFileToHDFSByFileObj(File localPath) throws IOException {
InputStream in = null;
if (null == localPath) {
System.out.println("copyFileToHDFSByFile: localpath are required");
return;
}
try {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(hdfsPath), conf);
FSDataOutputStream out = fileSystem.create(new Path(hdfsPath));
in = new BufferedInputStream(new FileInputStream(localPath));
IOUtils.copyBytes(in, out, 4096, false);
out.hsync();
out.close();
//in.close();
} finally {
IOUtils.closeStream(in);
}
return;
}
/*
* Download hdfs file in URI to local file
*/
public static void downloadFromHDFS(Configuration conf, String uri, String remoteFileName, String localFileName) throws IOException {
Path path = new Path(remoteFileName);
FileSystem fs = FileSystem.get(URI.create(uri), conf);
fs.copyToLocalFile(path, new Path(localFileName));
fs.close();
System.out.println("downloading file from " + remoteFileName + " to " + localFileName + " succeed");
return;
}
/*
* Download hdfs file in URI to local file
*/
public static void downloadFromHDFS(String uri, String HDFSFileName, OutputStream localFileOutPut) throws IOException {
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), config);
InputStream is = fs.open(new Path(uri + "/" + HDFSFileName));
IOUtils.copyBytes(is, localFileOutPut, 4096, true);//close in and out stream via this API itself.
System.out.println("downloading HDFS file " + HDFSFileName + " succeed");
fs.close();
return;
}
public static InputStream downloadFromHDFS(String uri, String HDFSFileName) throws IOException {
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), config);
InputStream is = fs.open(new Path(uri + HDFSFileName));
if (is == null) {
System.out.println("hdfs inputStream is null");
}
return is;
}
/*
* check whether the HDFS file exists in given URI
*/
public static boolean exists(String HDFSUri, String HDFSFileName) {
Configuration conf = new Configuration();
boolean fileExists = false;
try {
FileSystem fileSystem = FileSystem.get(URI.create(HDFSUri), conf);
fileExists = fileSystem.exists(new Path(HDFSUri + "/" + HDFSFileName));
} catch (IOException e) {
System.out.println("hdfs:exist() exception occurs. exception:" + e.getMessage());
return fileExists;
}
System.out.println("HDFS URI:" + HDFSUri + ", fileName:" + HDFSFileName + " exists ? " + fileExists);
return fileExists;
}
/**
* 查看目录下面的文件
*
* @param uri
* @param folder
* @throws IOException
*/
public static void ls(String uri, String folder) throws IOException {
Configuration conf = new Configuration();
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(uri), conf);
FileStatus[] list = fs.listStatus(path);
System.out.println("ls: " + folder);
System.out.println("==========================================================");
for (FileStatus f : list) {
System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDirectory(), f.getLen());
}
System.out.println("==========================================================");
fs.close();
}
/**
* 删除文件或者文件夹
* @param uri
* @param filePath
* @throws IOException
*/
public static void delete(String uri,String filePath) throws IOException {
Configuration conf = new Configuration();
Path path = new Path(filePath);
FileSystem fs = FileSystem.get(URI.create(uri), conf);
fs.deleteOnExit(path);
System.out.println("Delete: " + filePath);
fs.close();
}
public static String getCurrentDatePath(){
return new SimpleDateFormat("yyyy/MM/dd/").format(new Date());
}
public static String getCurrentDateTime(){
return new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
}
public static String buildPath(String... paths){
StringBuffer buffer=new StringBuffer(paths.length>0?paths[0]:"");
if(paths!=null&&paths.length>=2){
for(int i=1;i<paths.length;i++){
if(paths[i]==null||paths[i].length()==0){
continue;
}
if(paths[i-1].endsWith("/")){
if(paths[i].startsWith("/")){
buffer.append(paths[i].substring(1));
}else{
buffer.append(paths[i]);
}
}else{
if(paths[i].startsWith("/")){
buffer.append(paths[i]);
}else{
buffer.append("/").append(paths[i]);
}
}
}
}
return buffer.toString();
}
public static String writeFile(String data) throws Exception{
String localPath = HdfsFileSystem.class.getResource("").getPath();
String localFilename = getCurrentDateTime()+".txt";
localPath+=localFilename;
File file = new File(localPath);
//if file doesnt exists, then create it
if(!file.exists()){
file.createNewFile();
}
FileWriter fileWritter = new FileWriter(file.getName());
BufferedWriter bufferWritter = new BufferedWriter(fileWritter);
bufferWritter.write(data);
bufferWritter.close();
fileWritter.close();
System.out.println(localPath+" [write] done");
return localFilename;
}
public static void deleteFile(String filename) throws Exception{
String localPath = HdfsFileSystem.class.getResource("").getPath();
File file = new File(localPath+filename);
if(file.isFile() && file.exists()){
boolean delete = file.delete();
System.out.println(file.getPath()+" [delete] "+delete);
}
}
}
3.主程序入口类
- 这里需要传参,直接在执行jar命令后添加参数,如java -jar xx.jar 参数1 参数2 ...
- 打包时在pom文件里指定mainClass的路径即可
import org.apache.commons.collections.CollectionUtils;
import java.util.List;
import java.util.Map;
public class QueryDatasApplication {
public static void main(String[] args) {
//**********这里获取jar执行命令中的参数**************
//比如java -jar XXX.jar "sql"
String sql = args[0];
try {
//查询hive
// String sql = "select name,id_card from hbzfw.t_user";
List<Map<String, Object>> result = HiveClientUtils.getResult(sql);
if (CollectionUtils.isEmpty(result)){
System.out.println("--------------query hive null");
return;
}
//结果输出到hdfs
///data/result/2019/08/27/
String resUri = "/data/result/"+HdfsFileSystem.getCurrentDatePath();
String content = JacksonUtil.writeValueAsString(result);
///生成本地文件
String filename = HdfsFileSystem.writeFile(content);
//上传hdfs
HdfsFileSystem.copyFileToHDFSByFileObj(filename,resUri);
System.out.println("--------------send data to hdfs success");
//删除本地文件
HdfsFileSystem.deleteFile(filename);
} catch (Exception e) {
e.printStackTrace();
}
}
}
4.下面是依赖本地jar打包的Maven配置
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.aceleeyy</groupId>
<artifactId>acelee-hive-hdfs-main-jar</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.9.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.9.3</version>
</dependency>
<dependency>
<groupId>com.cloudera</groupId>
<artifactId>HiveJDBC4</artifactId>
<version>1.4</version>
<scope>system</scope>
<systemPath>${project.basedir}/src/lib/HiveJDBC4.jar</systemPath>
</dependency>
</dependencies>
<build>
<plugins>
<!-- maven jar in main class and dependencies start -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<archive>
<manifest>
<mainClass>QueryDmDatasApplication</mainClass>
</manifest>
</archive>
<!--<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>-->
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
<!-- 增加配置 -->
<configuration>
<!-- assembly.xml文件路径 -->
<descriptors>
<descriptor>src/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
<!-- maven jar in main class and dependencies end -->
</plugins>
</build>
</project>
assembly.xml
<assembly>
<id>jar-with-dependencies</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<!-- 默认的配置 -->
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<unpack>true</unpack>
<scope>runtime</scope>
</dependencySet>
<!-- 增加scope类型为system的配置 -->
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<unpack>true</unpack>
<scope>system</scope>
</dependencySet>
</dependencySets>
</assembly>