使用Hive编写MapReduce程序

最新推荐文章于 2024-04-11 08:00:00 发布

magina507

最新推荐文章于 2024-04-11 08:00:00 发布

阅读量4.4k

点赞数 1

分类专栏：大数据处理实验报告文章标签：学习笔记大数据

本文链接：https://blog.csdn.net/magina507/article/details/51649890

版权

大数据处理同时被 2 个专栏收录

14 篇文章 0 订阅

订阅专栏

实验报告

14 篇文章 0 订阅

订阅专栏

实验题目

使用Hive编写MapReduce程序

实验要求

在Eclipse中进行Java编码：

要求通过代码连接Hive服务器

通过代码建立数据库

通过代码创建数据表，并向其加载数据。

实验步骤

1.安装实验环境，启动Hadoop所有服务。

2.启动Hive服务器。

进入hive_home路径后，使用bin/hive --service hiveserver命令启动服务器。

3.进入Eclipse新建项目，并添加相关关联包。

其中jar文件为Hadoop_Home下所有的jar文件，以及Hive_Home/lib下所有的jar文件。

除此之外，还需要将Hive_Home/conf也添加进入。

这一步需要在Java项目下新建一个conf文件，然后将Hive_Home/conf下的所有文件复制到新建的conf中，再进行添加。

最下面的conf文件夹为新建的文件夹，内部的文件是从Hive_Home/conf中复制过来的。

4.编写源代码通过JDBC连接Hive服务器

import org.apache.hadoop.hive.service.ThriftHive;
import org.apache.hadoop.hive.service.ThriftHive.Client;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.transport.TSocket;
public class Hive_demo_01 {
/**
* @param args
*/
static TSocket transport;
private static Client getClient(String hiveServer, Integer hivePort)
{
final int SOME_BIG_NUMBER = 99999999;
Client client=null;
try {
transport = new TSocket(hiveServer, hivePort);
transport.setTimeout(SOME_BIG_NUMBER);
transport.open();
TBinaryProtocol protocol = new TBinaryProtocol(transport);
client = new ThriftHive.Client(protocol);
System.out.println("Connection is established");
return client;
}
catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
String HIVE_SERVER = "localhost";
Integer HIVE_PORT = new Integer(10000);
Client client = getClient(HIVE_SERVER, HIVE_PORT);
transport.close();
}
}

根据代码，运行成功不报错，会打印Connection is established。可见运行成功。

5.通过源代码创建jdbc_demo数据库。

创建数据库。

程序运行后终端窗口显示的内容。

6.编写源代码创建sample_data数据表，并向其加载数据。

package com.hive.demo;

import java.util.List;
import org.apache.hadoop.hive.service.ThriftHive;
import org.apache.hadoop.hive.service.ThriftHive.Client;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.hadoop.hive.service.HiveServerException;
import org.apache.thrift.transport.TSocket;

public class Hive_demo_01 {
	/**
	 * @param args
	 */
	static TSocket transport;

	private static Client getClient(String hiveServer, Integer hivePort) {
		final int SOME_BIG_NUMBER = 99999999;
		Client client = null;
		try {
			transport = new TSocket(hiveServer, hivePort);
			transport.setTimeout(SOME_BIG_NUMBER);
			transport.open();
			TBinaryProtocol protocol = new TBinaryProtocol(transport);
			client = new ThriftHive.Client(protocol);
			System.out.println("Connection is established");
			return client;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}

	private Client show_tables(Client c1) {
		try {
			try {
				c1.execute("show tables");
			} catch (TException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			List<String> li = null;
			try {
				li = c1.fetchAll();
			} catch (TException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			System.out.println(" *** The Tables List *** ");
			for (String string : li) {
				System.out.println(string);
			}
			System.out.println(" -------------------------------- ");
			Client c2 = c1;
			return c2;
		} finally {
		}
	}

	private Client create_tables(Client c1) {
		try {
			c1.execute("create table sample_data(name string)stored as textfile");
		} catch (HiveServerException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (TException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println(" *** sample_data tables is created *** ");
		System.out.println(" -------------------------------- ");
		Client c2 = c1;
		return c2;
	}

	private Client load_data(Client c1, String tbl_name) {
		try {
			c1.execute("load data local inpath '/home/wcbdd/Desktop/week5/sample.txt' into table "+tbl_name);
		} catch (HiveServerException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (TException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println(" *** loaded data into " + tbl_name + " *** ");
		System.out.println(" -------------------------------- ");
		Client c2 = c1;
		return c2;
	}

	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String HIVE_SERVER = "localhost";
		Integer HIVE_PORT = new Integer(10000);
		Client client = getClient(HIVE_SERVER, HIVE_PORT);
		Hive_demo_01 obj = new Hive_demo_01();
		client = obj.show_tables(client);
		System.out.println(" Before Creating the table sample_data ");
		client = obj.create_tables(client);
		System.out.println(" After Creating the table sample_data ");
		client = obj.show_tables(client);
		System.out.println(" loading data into sample_data ");
		client = obj.load_data(client, "sample_data");
		transport.close();
	}
}

需要将路径更改为文件的正确路径。