GreenPlum copy命令实现数据导入并添加事务管理

最新推荐文章于 2024-04-17 17:08:01 发布

依御

最新推荐文章于 2024-04-17 17:08:01 发布

阅读量667

点赞数

分类专栏： Greenplum copy命令 SpringBoot 文章标签： postgresql 大数据

本文链接：https://blog.csdn.net/xufan601391682/article/details/109519827

版权

SpringBoot 同时被 3 个专栏收录

3 篇文章 0 订阅

订阅专栏

Greenplum

1 篇文章 0 订阅

订阅专栏

copy命令

1 篇文章 0 订阅

订阅专栏

1.生成csv文件

@Component
public class MainFileService {
    private static final Logger logger = LoggerFactory.getLogger(LoadMesHisOpePanelDao.class);
    @Value("${csv.sep}")
    private char valueCsvSep;
    @Value("${csv.quote}")
    private char valueCsvQuote;
    @Value("${csv.null-string}")
    private char valueNullString;
    @Value("${csv.TemplatefileNamePrefix}")
    private String valueTemplatefileNamePrefix;
    @Value("${csv.TemplatefileNamesSuffix}")
    private String valueTemplatefileNamesSuffix;
    @Value("${csv.filePath}")
    private String valueCsvFilePath;

    private static char csvSep;
    private static char csvQuote;
    private static char nullString;
    private static String templatefileNamePrefix;
    private static String templatefileNameSuffix;
    private static String csvFilePath;

    @Autowired
    private QueryRunner mesGpQueryRunner;

    private static QueryRunner qr_gp;

    /**
     * 缓存表结构信息
     * 表结构更新需要重启进程
     * javadoc中关于hashmap的一段描述如下：此实现不是同步的。如果多个线程同时访问一个哈希映射，**而其中至少一个线程从结构上修改了该映射**，则它必须保持外部同步。
     * Hashtable 中的方法大多是Synchronize的，而HashMap中的方法在一般情况下是非Synchronize的。在多线程并发的环境下，可以直接使用Hashtable，不需要自己为它的方法实现同步，但使用HashMap时就必须要自己增加同步处理。
     * HashTable实现线程安全的代价就是效率变低，因为会锁住整个HashTable,而ConcurrentHashMap做了相关优化,因为ConcurrentHashMap使用了分段锁，并不对整个数据进行锁定,效率比HashTable高很多。
     * ConcurrentHashMap 是乐观锁
     */
    public static Map<String, TableEntity> tableMetaInfoMap = new ConcurrentHashMap();


  


    @PostConstruct
    public void init() {
//        System.out.println("Init MainFileService()");
        csvSep = valueCsvSep;
        csvQuote = valueCsvQuote;
        nullString = valueNullString;
        templatefileNamePrefix = valueTemplatefileNamePrefix;
        templatefileNameSuffix = valueTemplatefileNamesSuffix;
        csvFilePath = valueCsvFilePath;
        qr_gp = mesGpQueryRunner;
    }

    ;


    /*
     * 加载表结构信息到 Map
     */
    public static void loadTableMetaEntity(String schema, String tableName) throws ETLException {
        long startTime = System.currentTimeMillis();
        String sql = "select " +
                " nt.nspname as tableSchema," +
                " c.relname as tableName," +
                " a.attname as columnName," +
                " a.attnum as ordinalPosition," +
                " format_type(a.atttypid, a.atttypmod) as dataType," +
                " c.relkind = 'r' AS isUpdatatable," +
                " a.atttypid in (23, 20) and a.atthasdef and(" +
                " select " +
                " position ( 'nextval(' in pg_catalog.pg_get_expr(adbin,adrelid) ) > 0 and" +
                " position ( '::regclass)' in pg_catalog.pg_get_expr(adbin,adrelid) ) > 0" +
//                 get attrelid for query primary key
                " attrelid" +
                " FROM pg_catalog.pg_attrdef d" +
                " WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef" +
                " )as hasSequence," +
                " attrelid" +
                " from pg_catalog.pg_class c join pg_catalog.pg_namespace nt on (c.relnamespace = nt.oid)" +
                " join pg_attribute a on (a.attrelid = c.oid)" +
                " where c.relname = '" + tableName + "' and nt.nspname = '" + schema + "'" +
                " and a.attnum > 0 and a.attisdropped = 'f'" +
                " order by a.attnum asc";//按照序号排列
        try {
            List<ColumnEntity> tableColumnList = qr_gp.query(sql, new BeanListHandler<ColumnEntity>(ColumnEntity.class));
            if (null == tableColumnList || tableColumnList.isEmpty())
                throw new ETLException(ErrorCode.ERRORGP_MES_TABLE_NO_EXIST);
            TableEntity tableEntity = new TableEntity();
            tableEntity.setSchemaName(tableColumnList.get(0).getTableSchema());
            tableEntity.setTableName(tableColumnList.get(0).getTableName());
            tableEntity.setColumnEntityList(tableColumnList);
            tableEntity.setCsvFilePath(csvFilePath);
            tableMetaInfoMap.put(schema + "." + tableName, tableEntity);
            logger.info("加载表结构 {} 完成! Cost {} ms;", schema+"."+tableName,System.currentTimeMillis() - startTime);
        } catch (Exception e) {
            throw new ETLException(ErrorCode.ERRORGP_MES_TABLEMETA_LOAD, e);
        }
    }


    
    /*
     * 根据配置信息在服务器创建csv文件模板
     */
    public static String createCsvFile(String schema, String tableName, List<Map<String, Object>> tableDataList) throws Exception {
        String fullTableName = schema + "." + tableName;
        TableEntity tableEntity = tableMetaInfoMap.get(fullTableName);
        if (null == tableEntity) throw new ETLException(ErrorCode.ERRORGP_MES_TABLEMETA_LOAD);

        //文件名：表前缀 + tableName + 20200101080000999 + .csv
        DateTimeFormatter fmt24 = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSS");
        String fileNameSuffix = LocalDateTime.now().format(fmt24);
        String fileName = templatefileNamePrefix + tableName + fileNameSuffix + templatefileNameSuffix;


        File csvFile = null;
        BufferedWriter csvFileOutputStream = null;
        try {
            //创建目录
            File file = new File(tableEntity.getCsvFilePath());
            if (!file.exists()) file.mkdir();

            //创建csv,会覆盖同名csv
            csvFile = new File(tableEntity.getCsvFilePath(), fileName);

            // UTF-8使正确读取分隔符","
            csvFileOutputStream = new BufferedWriter(
                    new OutputStreamWriter(
                            new FileOutputStream(csvFile), "UTF-8"), 1024);

            csvFileOutputStream.write(new String(new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}));
            //将 表结构 写入文件头部
            for (ColumnEntity column : tableEntity.getColumnEntityList()) {
                if (!column.getOrdinalPosition().equals(1)) csvFileOutputStream.write(csvSep);
                csvFileOutputStream.write(csvQuote);
                if (null == column.getColumnName()) throw new ETLException(ErrorCode.ERRORGP_MES_TABLEMETA_LOAD);//字段名不能为空
                else csvFileOutputStream.write(column.getColumnName());
                csvFileOutputStream.write(csvQuote);
            }
//            csvFileOutputStream.write("" + "DB_TIMESTAMP" + "");

            //将结果集 写入 csv
            for (Map<String, Object> rowData : tableDataList) {
                csvFileOutputStream.newLine();
                for (ColumnEntity column : tableEntity.getColumnEntityList()) {
                    if (!column.getOrdinalPosition().equals(1)) csvFileOutputStream.write(csvSep);
                    if ("db_timestamp".equals(column.getColumnName())) {
                        csvFileOutputStream.write(csvQuote+"now()"+csvQuote);
                    }
                    else if (null == rowData.get(column.getColumnName())) csvFileOutputStream.write(nullString);
                    else csvFileOutputStream.write(csvQuote+rowData.get(column.getColumnName()).toString().replace("\\","\\\\").toString()+csvQuote);
                }
            }
        } catch (IOException e) {
            throw new ETLException(ErrorCode.ERRORCSV_TEMPLATE_CREATE, e);
        } finally {
            csvFileOutputStream.close();
        }
        return tableEntity.getCsvFilePath() + "/" + fileName;
    }

}

2.导入方法

@Service
@Transactional(transactionManager = "mesGpPlatformTransactionManager",rollbackFor = Exception.class)
public class SaveMesDataDao {

    @Value("${csv.sep}")
    private char csvSep;
    @Value("${csv.quote}")
    private char csvQuote;
    @Value("${csv.null-string}")
    private char nullString;


    @Autowired
    private QueryRunner mesGpQueryRunner;

    private static String tarSchema="ods";



    public void saveMesData(EtlConfD etlConfD,String tableName, List<Map<String, Object>> tableDataList) throws Exception {
        //生成CSV 文件
        String filePath=MainFileService.createCsvFile(tarSchema,tableName,tableDataList);
        //通过COPY FILE 导入
        excuteCopyfile(etlConfD,tarSchema,tableName,filePath);
    }



    public void excuteCopyfile(EtlConfD etlConfD,String schema, String tableName,String fileName) {
        String fullTableName=schema + "." + tableName;

        //delete
        StringBuffer deleteSql = new StringBuffer();
        switch (etlConfD.getQueryMinute()) {
            case 0:deleteSql.append("delete from ").append( fullTableName);break;// Truncate 也可以回滚，但是会锁表（锁表时无法查询）
            default:deleteSql.append("delete from ").append(fullTableName)
                    .append(" where ").append("src_db_timestamp").append(" >='").append(etlConfD.getRunStartTimestamp().toString().substring(0, 19)).append("'::date")
                    .append(" and ").append("src_db_timestamp").append(" <'").append(etlConfD.getRunEndTimestamp().toString().substring(0, 19)).append("'::date");

        }


        try {
            mesGpQueryRunner.update(deleteSql.toString());
        }catch (Exception e){
            throw new ETLException(ErrorCode.ERROR_MES_TRUNCATE,e);
        }

//        事务测试
//        if(1==1) throw new ETLException(ErrorCode.ERROR_MES_TRUNCATE);

        //insert
        List<ColumnEntity> columnEntity=MainFileService.tableMetaInfoMap.get(fullTableName).getColumnEntityList();
        StringBuffer tableInfo=new StringBuffer().append(columnEntity.get(0).getTableSchema()).append(".").append(columnEntity.get(0).getTableName()).append("(");
        for(ColumnEntity e:columnEntity){
            tableInfo.append(e.getColumnName()).append(",");
        }
        tableInfo.deleteCharAt(tableInfo.length()-1).append(")");

        String sql="copy " + tableInfo.toString() +
                " from stdin" +
//                " from '"+fileName+"'" +
                " WITH" +
                " DELIMITER AS '"+csvSep+"'"+//分隔符号
                " NULL AS '"+nullString+"'"+//空值符号
                " CSV HEADER" +
                " QUOTE AS'" +csvQuote+"'"+ //引用符号
                " ESCAPE '\\' ;";
        try (Reader reader = Files.newBufferedReader(Paths.get(fileName))) {
            CopyManager mgr = new CopyManager((BaseConnection) mesGpQueryRunner.getDataSource().getConnection().unwrap(PGConnection.class));
//            CopyManager mgr = new CopyManager((BaseConnection) JdbcUtil.getCopyManager());
            long rowCnt = mgr.copyIn(sql, reader);
//            System.out.println(rowCnt);
        }catch (Exception e){
            throw new ETLException(ErrorCode.ERRORCSV_COPYFILE_EXECUTE,e);
        }
    }
}

3.配置文件

csv:
# 分隔符
  sep: "\u001F"
  quote: "\u001E"
  null-string: "\u001D"
  TemplatefileNamePrefix: MES
  TemplatefileNamesSuffix: .csv
  filePath: '/opt/etl/filetest'

4.使用QueryRunner管理事务，详细看我的另一篇博客

依御

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
GreenPlum copy命令实现数据导入并添加事务管理

1.生成csv文件@Componentpublic class MainFileService { private static final Logger logger = LoggerFactory.getLogger(LoadMesHisOpePanelDao.class); @Value("${csv.sep}") private char valueCsvSep; @Value("${csv.quote}") private char valueCsv
复制链接

扫一扫

专栏目录