1创建测试文件
home/centos/zip/a.txt
2 代码编写
package com.it18zhang.hdfs.mr.compress;
import com.hadoop.compression.lzo.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.*;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.FileOutputStream;
/**
* Created by Administrator on 2017/3/13.
*/
public class TestCompress {
public static void main(String[] args) throws Exception {
Class[] zipClasses = {
DeflateCodec.class,
GzipCodec.class,
BZip2Codec.class,
Lz4Codec.class,
//SnappyCodec.class
com.hadoop.compression.lzo.LzoCodec.class
};
for (Class c : zipClasses) {
zip(c);
}
System.out.println("=================");
for (Class c : zipClasses) {
unzip(c);
}
}
/**
* 压缩测试
*/
public static void zip(Class codecClass) throws Exception {
long start = System.currentTimeMillis();
//实例化对象
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, new Configuration());
//创建文件输出流,得到默认扩展名
FileOutputStream fos = new FileOutputStream("/home/centos/zip/b" + codec.getDefaultExtension());
//得到压缩流
CompressionOutputStream zipOut = codec.createOutputStream(fos);
IOUtils.copyBytes(new FileInputStream("/home/centos/zip/a.txt"), zipOut, 1024);
zipOut.close();
System.out.println(codecClass.getSimpleName() + " : " + (System.currentTimeMillis() - start));
}
/**
* 压缩测试
*/
public static void unzip(Class codecClass) throws Exception {
long start = System.currentTimeMillis();
//实例化对象
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, new Configuration());
//创建文件输出流,得到默认扩展名
FileInputStream fis = new FileInputStream("/home/centos/zip/b" + codec.getDefaultExtension());
//得到压缩流
CompressionInputStream zipIn = codec.createInputStream(fis);
IOUtils.copyBytes(zipIn, new FileOutputStream("/home/centos/zip/b" + codec.getDefaultExtension() + ".txt"), 1024);
zipIn.close();
System.out.println(codecClass.getSimpleName() + " : " + (System.currentTimeMillis() - start));
}
}
3 pom配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.it18zhang</groupId>
<artifactId>HdfsDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins
</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.8</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<tasks>
<echo>---------开始复制jar包到共享目录下----------</echo>
<delete file="D:\downloads\bigdata\data\HdfsDemo-1.0-SNAPSHOT.jar"></delete>
<copy file="target/HdfsDemo-1.0-SNAPSHOT.jar" toFile="D:\downloads\bigdata\data\HdfsDemo.jar">
</copy>
</tasks>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.anarres.lzo</groupId>
<artifactId>lzo-hadoop</artifactId>
<version>1.0.0</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
</dependencies>
</project>
4 导出jar
5.设置服务器java vm的-agentlib:jdwp选项.
export HADOOP_CLIENT_OPTS=-agentlib:jdwp=transport=dt_socket,address=8888,server=y,suspend=y
6 安装snappy
sudo yum install -y snappy.x86_64
7
在pom.xml引入lzo依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.it18zhang</groupId>
<artifactId>HdfsDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins
</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.8</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<tasks>
<echo>---------开始复制jar包到共享目录下----------</echo>
<delete file="D:\downloads\bigdata\data\HdfsDemo-1.0-SNAPSHOT.jar"></delete>
<copy file="target/HdfsDemo-1.0-SNAPSHOT.jar" toFile="D:\downloads\bigdata\data\HdfsDemo.jar">
</copy>
</tasks>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.anarres.lzo</groupId>
<artifactId>lzo-hadoop</artifactId>
<version>1.0.0</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
</dependencies>
</project>
8.在centos上安装lzo库
$>sudo yum -y install lzo
9.使用mvn命令下载工件中的所有依赖
进入pom.xml所在目录,运行cmd:
mvn -DoutputDirectory=./lib -DgroupId=com.it18zhang -DartifactId=HdfsDemo -Dversion=1.0-SNAPSHOT dependency:copy-dependencies
10.在lib下存放依赖所有的第三方jar
11.找出lzo-hadoop.jar + lzo-core.jar复制到hadoop的响应目录下。
$>cp lzo-hadoop.jar lzo-core.jar /soft/hadoop/shared/hadoop/common/lib
12.执行远程程序即可。