一、思路
1.将excel转成csv格式的文件
2.读取csv的数据到hive表中
3.清除留痕
二、Maven依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.0.cloudera2</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.0.cloudera2</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.4.0.cloudera2</version>
<scope>compile</scope>
</dependency>
<!--file-->
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.4</version>
<classifier>jdk15</classifier>
</dependency>
<!--ExcelToHive-->
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
三、代码
package cn.com.businessmatrix;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class Excel2Hive {
private static SparkConf conf = new SparkConf();
private static SparkSession hc = SparkSession.builder()
.config(conf)
.enableHiveSupport()
.appName("Excel2Hive")
.getOrCreate();
public static void main(String[] args) {
String pi_schema = args[0];
String pi_table = args[1];
String excelPath = args[2];
String csvPath = args[3];
String pi_isDel = "1";
if (args.length > 4)
pi_isDel = args[4];
excelToCsv(excelPath,csvPath);
csvToHive(csvPath,pi_schema,pi_table,hc);
if ("1".equals(pi_isDel))
delCsvFile(csvPath);
}
public static void excelToCsv(String oldFilePath,String newFilePath){
String buffer = "";
Workbook wb =null;
Sheet sheet = null;
org.apache.poi.ss.usermodel.Row row = null;
List<Map<String,String>> list = null;
String cellData = null;
String filePath =oldFilePath ;
wb = readExcel(filePath);
if(wb != null){
list = new ArrayList<Map<String,String>>();
sheet = wb.getSheetAt(0);
int rownum = sheet.getPhysicalNumberOfRows();
row = sheet.getRow(0);
int colnum = row.getPhysicalNumberOfCells();
for (int i = 0; i<rownum; i++) {
row = sheet.getRow(i);
for (int j = 0; j < colnum; j++) {
cellData = (String) getCellFormatValue(row.getCell(j));
buffer +=cellData;
}
buffer = buffer.substring(0, buffer.lastIndexOf(",")).toString();
buffer += "\n";
}
System.out.println(buffer.split(","));
String savePath = newFilePath;
File saveCSV = new File(savePath);
try {
if(!saveCSV.exists())
saveCSV.createNewFile();
BufferedWriter writer = new BufferedWriter(new FileWriter(saveCSV));
writer.write(buffer);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static Workbook readExcel(String filePath){
Workbook wb = null;
if(filePath==null){
return null;
}
String extString = filePath.substring(filePath.lastIndexOf("."));
InputStream is = null;
try {
is = new FileInputStream(filePath);
if(".xls".equals(extString)){
return wb = new HSSFWorkbook(is);
}else if(".xlsx".equals(extString)){
return wb = new XSSFWorkbook(is);
}else{
return wb = null;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return wb;
}
public static Object getCellFormatValue(Cell cell){
Object cellValue = null;
if(cell!=null){
switch(cell.getCellType()){
case Cell.CELL_TYPE_NUMERIC:{
cellValue = String.valueOf(cell.getNumericCellValue()).replaceAll("\n", " ") + ",";
break;
}
case Cell.CELL_TYPE_FORMULA:{
if(DateUtil.isCellDateFormatted(cell)){
cellValue = String.valueOf(cell.getDateCellValue()).replaceAll("\n", " ") + ",";
}else{
cellValue = String.valueOf(cell.getNumericCellValue()).replaceAll("\n", " ") + ",";
}
break;
}
case Cell.CELL_TYPE_STRING:{
cellValue = cell.getRichStringCellValue().getString().replaceAll("\n", " ") + ",";
break;
}
default:
cellValue = "";
}
}else{
cellValue = "";
}
return cellValue;
}
private static void csvToHive(String csvPath,String pi_schema,String pi_table,SparkSession hc){
StructType lookdf_st = null;
List<StructField> lookdf_st_list = new ArrayList<StructField>();
lookdf_st_list.add(DataTypes.createStructField("KEY", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("area_rule_id", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_product_type", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("sk_product", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("dk_cust_type", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_invpty_type", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("sk_invpty", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("sk_account_type", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("sk_account", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("sk_agency", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("dk_agency_type", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_region", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("dk_org_tree", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_org", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("dk_org_tree_of_bl", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_org_of_bl", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("dk_custmngr_type", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_invpty_of_custmngr", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("priority", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("ratio", DataTypes.createDecimalType(5,2), true));
lookdf_st_list.add(DataTypes.createStructField("effective_from", DataTypes.createDecimalType(8,0), true));
lookdf_st_list.add(DataTypes.createStructField("effective_to", DataTypes.createDecimalType(8,0), true));
lookdf_st_list.add(DataTypes.createStructField("lval_ag", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("rval_ag", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("lval_rg", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("rval_rg", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("lval_ar", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("rval_ar", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("lval_bl", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("rval_bl", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("dk_tano", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("cserialno", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_tradeacco_reg", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("agencyno", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("netno", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("dk_share_type", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("dk_arsplt_chg_flag", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("dk_anal_dymic", DataTypes.StringType, true));
lookdf_st_list.add(DataTypes.createStructField("sk_agency_of_lv1", DataTypes.createDecimalType(12,0), true));
lookdf_st_list.add(DataTypes.createStructField("sk_region_of_lv1", DataTypes.createDecimalType(12,0), true));
lookdf_st = DataTypes.createStructType(lookdf_st_list);
Dataset<Row> lookdf = hc.read()
.schema(lookdf_st)
.option("header","true")
.csv(csvPath);
hc.createDataFrame(lookdf.toJavaRDD(), lookdf_st).write()
.mode(SaveMode.Overwrite)
.saveAsTable(pi_schema + "." + pi_table);
hc.close();
}
private static void delCsvFile(String csvPath) {
File csvFile = new File(csvPath);
if (csvFile.isFile() && csvFile.exists()) {
csvFile.delete();
System.out.println("del file is ok !!!");
} else {
System.out.println(csvFile + " is not exists !!!");
}
}
}