java+hadoop实现文件操作


	<dependencies>

		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-mapreduce-client-core</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-auth</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>1.2.17</version>
		</dependency>
		<dependency>
			<groupId>commons-logging</groupId>
			<artifactId>commons-logging</artifactId>
			<version>1.2</version>
		</dependency>
		<dependency>
			<groupId>com.google.guava</groupId>
			<artifactId>guava</artifactId>
			<version>19.0</version>
		</dependency>
		<dependency>
			<groupId>commons-collections</groupId>
			<artifactId>commons-collections</artifactId>
			<version>3.2.2</version>
		</dependency>
		<dependency>
			<groupId>commons-cli</groupId>
			<artifactId>commons-cli</artifactId>
			<version>1.2</version>
		</dependency>
		<dependency>
			<groupId>commons-lang</groupId>
			<artifactId>commons-lang</artifactId>
			<version>2.6</version>
		</dependency>
		<dependency>
			<groupId>commons-configuration</groupId>
			<artifactId>commons-configuration</artifactId>
			<version>1.9</version>
		</dependency>
		<dependency>
			<groupId>org.apache.avro</groupId>
			<artifactId>avro</artifactId>
			<version>1.7.7</version>
		</dependency>
		<dependency>
			<groupId>commons-io</groupId>
			<artifactId>commons-io</artifactId>
			<version>2.5</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/junit/junit -->
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
			<scope>test</scope>
		</dependency>

	</dependencies>
	<build>
		<plugins>
			<!--编译配置 -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
					<encoding>UTF-8</encoding>
				</configuration>
			</plugin>
			<!--资源文件问题 -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-resources-plugin</artifactId>
				<version>2.6</version>
				<configuration>
					<encoding>UTF-8</encoding>
				</configuration>
			</plugin>
			<!-- 把依赖的jar包拷到lib目录下 -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-dependency-plugin</artifactId>
				<executions>
					<execution>
						<id>copy-dependencies</id>
						<phase>package</phase>
						<goals>
							<goal>copy-dependencies</goal>
						</goals>
						<configuration>
							<outputDirectory>${project.build.directory}/lib</outputDirectory>
							<overWriteReleases>false</overWriteReleases>
							<overWriteSnapshots>false</overWriteSnapshots>
							<overWriteIfNewer>true</overWriteIfNewer>
						</configuration>
					</execution>
				</executions>
			</plugin>

		</plugins>
	</build>

pom文件依赖如上图

编写测试类


import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

/**
 * 测试hadoop
 * 
 * @author Administrator
 *
 */
public class hadoopTest {

	/**
	 * 查询所有的文件夹及文件
	 * 
	 * @throws IOException
	 */
	@org.junit.Test
	public void listFiles() throws IOException {
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://192.168.10.110:9001");
		FileSystem fs = FileSystem.newInstance(conf);
		// true 表示递归查找 false 不进行递归查找
		RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path("/"), true);
		while (iterator.hasNext()) {
			LocatedFileStatus next = iterator.next();
			System.out.println(next.getPath());
		}
		System.out.println("----------------------------------------------------------");
		FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
		for (int i = 0; i < fileStatuses.length; i++) {
			FileStatus fileStatus = fileStatuses[i];
			System.out.println(fileStatus.getPath());
		}
	}

	/**
	 * 上传文件到hdfs上
	 */
	@org.junit.Test
	public void upload() throws IOException {
		long currentTimeMillis = System.currentTimeMillis();
		Configuration conf = new Configuration();
//		conf.set("fs.defaultFS", "hdfs://192.168.10.110:9001");
		conf.set("fs.default.name", "hdfs://192.168.10.110:9001");
		FileSystem fs = FileSystem.get(conf);
		fs.copyFromLocalFile(new Path("F:\\sso.jar"), new Path("/test1"));  //第一个为本地文件路径,第二个为hadoop路径
		long c = System.currentTimeMillis() - currentTimeMillis;
		System.out.println(c);
	}

	/**
	 * 将hdfs上文件下载到本地
	 */
	@org.junit.Test
	public void download() throws IOException {
		long currentTimeMillis = System.currentTimeMillis();
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://192.168.10.110:9001");
		conf.set("fs.default.name", "hdfs://192.168.10.110:9001");
//		conf.set("hadoop.home.dir", "F:\\lys\\javatools\\hadoop/bin"); //设定你的hadoop的目录
		FileSystem fs = FileSystem.newInstance(conf);
		fs.copyToLocalFile(new Path("/test1/VMware-workstation-full-14.1.1.28517.exe"), new Path("D:\\home")); //
		System.out.println("成功");
		long c = System.currentTimeMillis() - currentTimeMillis;
		System.out.println(c);
	
	}

	/**
	 * 在hdfs更目录下面创建test1文件夹
	 * 
	 * @throws IOException
	 */
	@org.junit.Test
	public void mkdir() throws IOException {
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://192.168.10.110:9001");
		FileSystem fs = FileSystem.newInstance(conf);
		fs.mkdirs(new Path("/test1"));
		System.out.println("成功");
	}

}

 

fonfiguration为配置,里边要set一些hadoop的设置,

在文件下载操作时,要注意一些问题,我是在其他服务器上部署的hadoop,是一个单体的,但是我要实现下载文件操作,在本机上也要添加hadoop的安装,并且配置环境变量,并加入path文件,下载的hadoop的bin目录下还需要添加一些插件工具

此为bin目录下的所有文件,winutils.exe文件及其他的插件工具

如此以上配置,才可以使用文件下载操作,不然会报错,报错信息为 : hadoophome和一个.dir文件unset

以上为java+hadoop的一些操作,刚学习,欢迎补充

 

 

 

 

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值