hive动态分区shell_一段读取 excel 自动生成 hive load shell 脚本的代码

该代码实现了一个Java程序,从Excel文件中读取数据并自动生成Hive动态分区加载的shell脚本。程序处理Excel文件,找出列信息,结合配置文件中的接口名和日期,替换模板文件中的占位符,最终生成用于加载数据到Hive的shell脚本。
摘要由CSDN通过智能技术生成

自动化是程序员的天性,这里是一段小代码自动化手工重复的工作。

package collect;

import com.google.common.base.Charsets;

import com.google.common.base.Joiner;

import com.google.common.io.Files;

import com.google.common.io.LineProcessor;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;

import org.apache.poi.ss.usermodel.Cell;

import org.apache.poi.ss.usermodel.Sheet;

import org.apache.poi.ss.usermodel.Workbook;

import org.apache.poi.ss.usermodel.WorkbookFactory;

import java.io.File;

import java.io.FilenameFilter;

import java.io.IOException;

import java.text.SimpleDateFormat;

import java.util.ArrayList;

import java.util.Date;

import java.util.List;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

/**

* All rights Reserved, Designed By Migu.cn

*

* @Description: 读取表 Excel 信息,自动生成动态分区脚本

* @Author: Yao

* @Date: 2018/5/8 9:27

* @Version: v0.1

*/

public class AutoGenSh {

public static final String template_file = "dynamicLoadTemplate.txt";

public static final String interface_name = "%interface_name%";

public static final String date = "%date%";

public static final String table_name = "%ods_table_name%";

public static final String file_name = "%file_name%";

public static final String tmpTableName = "%tmp_table_name%";

public static final String start_index = "%startIndex%";

public static final String column_list = "%column_list%";

static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//可以方便地修改日期格式

public static void main(String[] args) {

// read config from configfile

String currentDir = System.getProperty("user.dir");

File[] confFiles = new File(currentDir).listFiles(new FilenameFilter() {

public boolean accept(File dir, String name) {

return name.endsWith(".conf");

}

});

for (File conf : confFiles) {

String confName = conf.getName();

// read config find interfaceName and fileName;

try {

List lines = Files.readLines(conf, Charsets.UTF_8);

final String interfaceName = lines.get(0).substring(lines.get(0).indexOf("=") + 1);

final String fileName = lines.get(1).substring(lines.get(1).indexOf("=") + 1);

final String tableName = confName.substring(0, confName.indexOf("."));

// find start index from filename

final int startDateIndex = findDateIndex(fileName);

// extract columns from excel

String excelPath = currentDir + File.separator + tableName + ".xlsx";

// don't miss last ,

File excelFile = new File(excelPath);

final String columns = extractColumns(excelFile) + ",";

// final text after replace

final List replaceResult = new ArrayList<>();

final String currentDate = dateFormat.format(new Date());

File templateFile = new File(currentDir + File.separator + template_file);

Files.readLines(templateFile, Charsets.UTF_8, new LineProcessor>() {

public boolean processLine(String s) throws IOException {

s = s.replace(interface_name, interfaceName)

.replace(date, currentDate)

.replace(table_name, tableName)

.replace(tmpTableName, tableName + "tmp")

.replace(column_list, columns)

.replace(file_name, fileName)

.replace(start_index, startDateIndex + "");

return replaceResult.add(s);

}

public List getResult() {

return replaceResult;

}

});

String shText = Joiner.on("\n").join(replaceResult);

// export sh file

// mkdir and move file

String newDir = currentDir + File.separator + tableName;

new File(newDir).mkdirs();

// move xlsx and conf

conf.renameTo(new File(newDir + "/" + conf.getName()));

File eFile = new File(excelPath);

eFile.renameTo(new File(newDir + "/" + eFile.getName()));

String destShFile = newDir + "/" + tableName + "_2ods.sh";

Files.write(shText.getBytes(), new File(destShFile));

System.out.println("success export sh file: " + destShFile);

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

* extract columns from excel

*

* @param templateFile

* @return

*/

private static String extractColumns(File templateFile) {

Workbook excel = null;

try {

List list = new ArrayList();

excel = WorkbookFactory.create(templateFile);

Sheet sheet = excel.getSheetAt(0);

for (int i = 0; i <= sheet.getLastRowNum(); i++) {

Cell cell = sheet.getRow(i).getCell(0);

String value = cell.getStringCellValue();

// skip filename filedname in_date

if (!value.toLowerCase().contains("filename")

&& !value.toLowerCase().contains("in_date")

&& !value.toLowerCase().contains("fieldname")) {

list.add(" " + value);

}

}

return Joiner.on(",\n").join(list);

} catch (IOException e) {

e.printStackTrace();

} catch (InvalidFormatException e) {

e.printStackTrace();

} finally {

if (excel != null) {

try {

excel.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

return null;

}

/**

* 从文件名中获取 substring start index

*

* @param fileName i-client-base_content_20180503_ssms_000000.txt

* @return

*/

private static int findDateIndex(String fileName) {

if (fileName.contains("YYYYMMDD")) {

return fileName.indexOf("YYYYMMDD") + 1;

}

String extractDateRegex = "(\\d{8})";

Matcher matcher = Pattern.compile(extractDateRegex).matcher(fileName);

if (matcher.find()) {

String date = matcher.group(1);

return fileName.indexOf(date) + 1;

}

return -1;

}

}

Shell 脚本模板定义如下

#!/bin/sh

#

#***************************************************************************************************

# ** 文件名称: %interface_name% 入 hive ods

# ** 功能描述: 从临时表加载 动态分区

# **

# ** 创建者: Chen Yao

# ** 创建日期: %date%

# ** 修改日志:

# ** 修改日期 修改人 修改内容

# ** -----------------------------------------------------------------------------------------------

# **

# ** China Mobile(Chengdu) Information Technology Co., Ltd.

# ** All Rights Reserved.

#***************************************************************************************************

export LANG=en_US.UTF-8

export HIVE_HOME=`echo $HIVE_HOME`

export PATH=$PATH:$HIVE_HOME/bin

#参数配置

#初始化表名

source_table_tmp=%tmp_table_name%

#结果表

target_table=%ods_table_name%

#echo "drop table if exists ;

# i-client-base_content_20180503_ssms_000000.txt

echo "

use mgwh_mgplusmigrate_ods;

set hive.exec.dynamic.partition=true;

set hive.exec.dynamic.partition.mode=nonstrict;

insert into ${target_table} partition(dayid)

select filename,

from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),

%column_list%

substr(filename,%startIndex%,8) as dayid

from ${source_table_tmp};" | hive &&

exit 0

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值