使用Java代码实现Excel2Hive

14 篇文章 0 订阅
2 篇文章 1 订阅

一、思路

1.将excel转成csv格式的文件
2.读取csv的数据到hive表中
3.清除留痕

二、Maven依赖

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-core_2.11</artifactId>
    <version>2.4.0.cloudera2</version>
    <scope>compile</scope>
</dependency>
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.11</artifactId>
    <version>2.4.0.cloudera2</version>
    <scope>compile</scope>
</dependency>
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-hive_2.11</artifactId>
    <version>2.4.0.cloudera2</version>
    <scope>compile</scope>
</dependency>
<!--file-->
<dependency>
    <groupId>net.sf.json-lib</groupId>
    <artifactId>json-lib</artifactId>
    <version>2.4</version>
    <classifier>jdk15</classifier>
</dependency>

<!--ExcelToHive-->
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>5.2.2</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>5.2.2</version>
</dependency>

三、代码

package cn.com.businessmatrix;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;


public class Excel2Hive {
    private static SparkConf conf = new SparkConf();
    private static SparkSession hc = SparkSession.builder()
            .config(conf)
            .enableHiveSupport()
            .appName("Excel2Hive")
            .getOrCreate();

    public static void main(String[] args) {
        String pi_schema = args[0];
        String pi_table = args[1];
        //String pi_schemaSql = args[1];
        String excelPath = args[2];
        String csvPath = args[3];
        String pi_isDel = "1";
        if (args.length > 4)
            pi_isDel = args[4];

        //1.将excel转成csv格式的文件
        excelToCsv(excelPath,csvPath);
        //2.读取csv的数据到hive表中
        csvToHive(csvPath,pi_schema,pi_table,hc);
        //3.清除留痕 -默认清除留痕
        if ("1".equals(pi_isDel))
            delCsvFile(csvPath);
    }
    //1.将excel转成csv格式的文件
    public static void excelToCsv(String oldFilePath,String newFilePath){
        String buffer = "";
        Workbook wb =null;
        Sheet sheet = null;
        org.apache.poi.ss.usermodel.Row row = null;
        List<Map<String,String>> list = null;
        String cellData = null;
        String filePath =oldFilePath ;

        wb = readExcel(filePath);
        if(wb != null){
            //用来存放表中数据
            list = new ArrayList<Map<String,String>>();
            //获取第一个sheet
            sheet = wb.getSheetAt(0);
            //获取最大行数
            int rownum = sheet.getPhysicalNumberOfRows();
            //获取第一行
            row = sheet.getRow(0);
            //获取最大列数
            int colnum = row.getPhysicalNumberOfCells();
            for (int i = 0; i<rownum; i++) {
                row = sheet.getRow(i);
                for (int j = 0; j < colnum; j++) {

                    cellData = (String) getCellFormatValue(row.getCell(j));

                    buffer +=cellData;
                }
                buffer = buffer.substring(0, buffer.lastIndexOf(",")).toString();
                buffer += "\n";

            }

            System.out.println(buffer.split(","));
            String savePath = newFilePath;
            File saveCSV = new File(savePath);
            try {
                if(!saveCSV.exists())
                    saveCSV.createNewFile();
                BufferedWriter writer = new BufferedWriter(new FileWriter(saveCSV));
                writer.write(buffer);
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
    //读取excel
    public static Workbook readExcel(String filePath){
        Workbook wb = null;
        if(filePath==null){
            return null;
        }
        String extString = filePath.substring(filePath.lastIndexOf("."));
        InputStream is = null;
        try {
            is = new FileInputStream(filePath);
            if(".xls".equals(extString)){
                return wb = new HSSFWorkbook(is);
            }else if(".xlsx".equals(extString)){
                return wb = new XSSFWorkbook(is);
            }else{
                return wb = null;
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return wb;
    }
    //类型转换
    public static Object getCellFormatValue(Cell cell){
        Object cellValue = null;
        if(cell!=null){
            //判断cell类型
            switch(cell.getCellType()){
                case Cell.CELL_TYPE_NUMERIC:{
                    cellValue = String.valueOf(cell.getNumericCellValue()).replaceAll("\n", " ") + ",";
                    break;
                }
                case Cell.CELL_TYPE_FORMULA:{
                    //判断cell是否为日期格式
                    if(DateUtil.isCellDateFormatted(cell)){
                        //转换为日期格式YYYY-mm-dd
                        cellValue = String.valueOf(cell.getDateCellValue()).replaceAll("\n", " ") + ",";
                    }else{
                        //数字
                        cellValue = String.valueOf(cell.getNumericCellValue()).replaceAll("\n", " ") + ",";
                    }
                    break;
                }
                case Cell.CELL_TYPE_STRING:{
                    cellValue = cell.getRichStringCellValue().getString().replaceAll("\n", " ") + ",";
                    break;
                }
                default:
                    cellValue = "";
            }
        }else{
            cellValue = "";
        }
        return cellValue;
    }

    //2.读取csv的数据到hive表中
    private static void csvToHive(String csvPath,String pi_schema,String pi_table,SparkSession hc){
        StructType lookdf_st = null;
        //模式 --TODO:后续改成读取表结构
        List<StructField> lookdf_st_list = new ArrayList<StructField>();

        lookdf_st_list.add(DataTypes.createStructField("KEY", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("area_rule_id", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_product_type", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("sk_product", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("dk_cust_type", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_invpty_type", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("sk_invpty", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("sk_account_type", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("sk_account", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("sk_agency", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("dk_agency_type", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_region", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("dk_org_tree", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_org", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("dk_org_tree_of_bl", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_org_of_bl", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("dk_custmngr_type", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_invpty_of_custmngr", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("priority", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("ratio", DataTypes.createDecimalType(5,2), true));
        lookdf_st_list.add(DataTypes.createStructField("effective_from", DataTypes.createDecimalType(8,0), true));
        lookdf_st_list.add(DataTypes.createStructField("effective_to", DataTypes.createDecimalType(8,0), true));
        lookdf_st_list.add(DataTypes.createStructField("lval_ag", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("rval_ag", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("lval_rg", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("rval_rg", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("lval_ar", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("rval_ar", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("lval_bl", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("rval_bl", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("dk_tano", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("cserialno", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_tradeacco_reg", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("agencyno", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("netno", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("dk_share_type", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("dk_arsplt_chg_flag", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("dk_anal_dymic", DataTypes.StringType, true));
        lookdf_st_list.add(DataTypes.createStructField("sk_agency_of_lv1", DataTypes.createDecimalType(12,0), true));
        lookdf_st_list.add(DataTypes.createStructField("sk_region_of_lv1", DataTypes.createDecimalType(12,0), true));

        lookdf_st = DataTypes.createStructType(lookdf_st_list);

        //数据
        Dataset<Row> lookdf = hc.read()
                .schema(lookdf_st)
                .option("header","true")
                .csv(csvPath);

        //保存数据到表中
        hc.createDataFrame(lookdf.toJavaRDD(), lookdf_st).write()
                .mode(SaveMode.Overwrite)
                .saveAsTable(pi_schema + "." + pi_table);
        //关闭资源
        hc.close();
    }

    //3.清除留痕
    private static void delCsvFile(String csvPath) {
        File csvFile = new File(csvPath);
        if (csvFile.isFile() && csvFile.exists()) {
            csvFile.delete();
            System.out.println("del file is ok !!!");
        } else {
            System.out.println(csvFile + " is not exists !!!");
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

脸ル粉嘟嘟

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值