package com.sitech; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.ParquetOutputFormat; import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.example.data.Group; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Properties; public class HbaseToHdfs { /** * Init properties. * * @param path the path * @return the properties * @throws Exception the exception */ public static Properties init(final String path) throws Exception { Properties properties = new Properties(); InputStream inputStream = new FileInputStream(new File(path)); try { properties.load(inputStream); } catch (IOException e) { e.printStackTrace(); } finally { inputStream.close(); } return properties; } /** * Gets schema. * * @param cloumns the cloumns * @retu
parquet 形式MapReduce hbase 数据写入hdfs
最新推荐文章于 2022-07-25 17:37:23 发布
该博客介绍了如何将HBase表的数据迁移到HDFS,使用Parquet格式进行存储。通过Java代码实现,包括设置扫描条件、配置Parquet输出格式和Schema,以及使用TableMapReduceUtil进行数据映射。
摘要由CSDN通过智能技术生成