spark1.3.1默认不支持Hive,如果想要在SparkSQL中使用HiveContext需要自己编译spark的版本。
在Hive里定义一个数据库saledata和三个表MDate、MStock、MStockDetail,并装载数据,具体命令如下:
CREATE DATABASE SALEDATA;
use SALEDATA;
//Date.txt定义了日期的分类,将每天分别赋予所属的月份、星期、季度等属性
//日期、年月、年、月、日、周几、第几周、季度、旬、半月
CREATE TABLE tblDate(dateID string,theyearmonth string,theyear string,themonth string,
thedate string,theweek string,theweeks string,thequot string,thetenday string,thehalfmonth string) ROW FORMAT
DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n‘;
//Stock.txt文件定义了订单表头
//订单号、交易位置、交易日期
CREATE TABLE tblStock(ordernumber string,locationid string,dateID string) ROW RORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n’;
//StockDetail.txt文件定义了订单明细
//订单号、行号、货品、数量、金额
CREATE TABLE tblStockDetail(ordernumber STRING,rownum int,itemid string,qty int,price int,amount int)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';
//装在数据
LOAD DATA LOCAL INPATH '/home/mmicky/mboo/MyClass/doc/Spark SQL/data/Date.txt' INTO TABLE tblDate;
LOAD DATA LOCAL INPATH '/home/mmicky/mboo/MyClass/doc/Spark SQL/data/Stock.txt' INTO TABLE tblStock;
LOAD DATA LOCAL INPATH '/home/mmicky/mboo/MyClass/doc/Spark SQL/data/StockDetail.txt' INTO TABLE tblStockDetail;
//
package llf
import org.apache.log4j.{Level, Logger}
import org.apache.spark