set mapred.compress.map.output=true;
set mapred.map.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
set mapred.output.compress=true;
set mapred.output.compression.type=BLOCK;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec;
set hive.exec.compress.output=true;
set hive.exec.compress.intermediate=true;
set hive.intermediate.compression.codec=org.apache.hadoop.io.compress.LzoCodec;
INSERT OVERWRITE TABLE XXX SELECT * FROM YYY;
如果不设置hive.intermediate.compression.codec=org.apache.hadoop.io.compress.LzoCodec,则会报错"java.io.EOFException: Premature EOF from inputStream",原因应该是中间数据会按默认的格式压缩(.deflate),导致读入失败。
参考: