1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
/*******************old hadoop api*************************/
val
confHadoop
=
new
JobConf
confHadoop.set(
"mapred.output.compress"
,
"true"
)
confHadoop.set(
"mapred.output.compression.codec"
,
"com.hadoop.compression.lzo.LzopCodec"
)
val
textFile
=
sc.hadoopFile(args(
0
), classOf[DeprecatedLzoTextInputFormat],classOf[LongWritable], classOf[Text],
1
)
textFile.saveAsHadoopFile(args(
1
),classOf[LongWritable], classOf[Text], classOf[TextOutputFormat[LongWritable,Text]],confHadoop)
/*******************new hadoop api*************************/
val
job
=
new
Job()
job.setOutputFormatClass(classOf[TextOutputFormat[LongWritable,Text]])
job.getConfiguration().set(
"mapred.output.compress"
,
"true"
)
job.getConfiguration().set(
"mapred.output.compression.codec"
,
"com.hadoop.compression.lzo.LzopCodec"
)
val
textFile
=
sc.newAPIHadoopFile(args(
0
), classOf[LzoTextInputFormat],classOf[LongWritable], classOf[Text],job.getConfiguration())
textFile.saveAsNewAPIHadoopFile(args(
1
), classOf[LongWritable], classOf[Text],classOf[TextOutputFormat[LongWritable,Text]],job.getConfiguration())
/*******************textFile*************************/
val
textFile
=
sc.textFile(args(
0
),
1
)
textFile.saveAsTextFile(args(
1
), classOf[LzopCodec])
|
1
2
|
spark.executor.extraLibraryPath=
/usr/lib/native/
spark.executor.extraClassPath=
/usr/lib/hadoop/lib/hadoop-lzo
.jar
|
1
2
|
--driver-class-path
/usr/lib/hadoop/lib/hadoop-lzo
.jar
--driver-library-path
/usr/lib/native
|
1
2
3
|
hql(
"set io.compression.codecs=com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec"
)
hql(
"set io.compression.codec.lzo.class=com.hadoop.compression.lzo.LzoCodec"
)
hql(
"set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec"
)
|