#1. load packages
library("DBI")
library("rJava")
library("RJDBC")
#2. specify paths etc.
hive.class.path = list.files(path=c("/usr/local/hadoop/hive-release/lib"),pattern="jar", full.names=T);
hadoop.common.lib.path = list.files(path=c("/usr/local/hadoop/hadoop-2.7.1/share/hadoop/common"),pattern="jar",full.names=T);
hadoop.tool.lib.path = list.files(path=c("/usr/local/hadoop/hadoop-2.7.1/share/hadoop/tools/lib/"),pattern="jar",full.names=T);
cp = c(hive.class.path,hadoop.common.lib.path,hadoop.tool.lib.path)
.jinit(classpath=cp,parameters="-Djavax.security.auth.useSubjectCredsOnly=false")
#3. load the driver
#driver = JDBC(driver_class, driver_path)
drv <- JDBC("org.apache.hive.jdbc.HiveDriver", "/usr/local/hadoop/hive-release/lib/hive-jdbc-1.2.1-standalone.jar")
#3. define the connection
#connection = dbConnect(driver, address, user, password)
conn <- dbConnect(drv, "jdbc:hive2://xxx.xxx.x.x:port/xuserx;principal=hive/_HOST@xdomain;")
#4. query data
# if need to specify MapReduce queue
dbSendUpdate(conn, "set mapreduce.job.queuename=xxxx.xxxx.xxxx")
dbGetQuery(conn,"show tables")