鉴于网上关于java写parquet文件的代码,hive不可识别,特意做了对应demo,有需求的可以自行拿去。
public class WriteParquetFile1 { public static void main(String[] args) throws Exception { JobConf conf = new JobConf(); String url = "hdfs://XXXX:8020"; conf.set("fs.defaultFS", url); FileSystem fs = FileSystem.get(new URI(url), conf, "admin"); Path outputPath = new Path("/XXX/test.parquet"); if(fs.exists(outputPath)){ fs.delete(outputPath,true); } StructObjectInspector inspector = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); ParquetHiveSerDe serde = new ParquetHiveSerDe(); OutputFormat outFormat = new MapredParquetOutputFormat(); MessageType messageType = getMessageTypeFromCode(); conf.set("parquet.hive.schema", messageType.toString()); RecordWriter writer = outFormat.getRecordWriter(fs, conf, outputPath.toString(), Reporter.NULL); writer.write((NullWritable)null, serde.serialize(new MyRow("1","张三", 20), inspector)); writer.write((NullWritable)null, serde.serialize(new MyRow("2","李四", 22), inspector)); writer.write((NullWritable)null, serde.serialize(new MyRow("3","王五", 30), inspector)); writer.close(Reporter.NULL); fs.close(); System.out.println("write success ."); } static class MyRow implements Writable { String id; String name; int age; MyRow(String id, String name, int age) { this.id=id; this.name = name; this.age = age; } @Override public void readFields(DataInput arg0) { throw new UnsupportedOperationException("no write"); } @Override public void write(DataOutput arg0) { throw new UnsupportedOperationException("no read"); } } private static MessageType getMessageTypeFromCode() { MessageType messageType = Types.buildMessage() .required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("id") .required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("name") .required(PrimitiveType.PrimitiveTypeName.INT32).named("age") .named("demo"); return messageType; } } 中间有两个坑需要说明
1:conf.set("parquet.hive.schema", messageType.toString());需要配置这个配置,不然报空指针异常
2:(NullWritable)null不能写成NullWritable.get()不然会报notcast异常