spark

jps
cd /apps/hadoop/sbin
./start-all.sh

mkdir -p /data/spark5

cd /data/spark5
wget http://192.168.1.150:60000/allfiles/spark5/orders
wget http://192.168.1.150:60000/allfiles/spark5/order_items

hadoop fs -mkdir /myspark5
hadoop fs -put /data/spark5/orders /myspark5
hadoop fs -put /data/spark5/order_items /myspark5

spark-shell

val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
case class Orders(order_id:String,order_number:String,buyer_id:String,create_dt:String)
val dforders = sc.textFile("/myspark5/orders").map(_.split('\t')).map(line=>Orders(line(0),line(1),line(2),line(3))).toDF()
dforders.registerTempTable("orders")

sqlContext.sql("show tables").map(t=>"tableName is:"+t(1)).collect().foreach(println)
sqlContext.sql("select order_id,buyer_id from orders").collect

cd /data/spark5/
vim CheckSpark1

import org.apache.spark.sql._
import org.apache.spark.sql.types._
val rddorder_items = sc.textFile("/myspark5/order_items")
val roworder_items = rddorder_items.map(_.split("\t")).map( p=>Row(p(0),p(1),p(2) ) )
val schemaorder_items = "item_id order_id goods_id"
val schema = StructType(schemaorder_items.split(" ").map(fieldName=>StructField(fieldName,StringType,true)) )
val dforder_items = sqlContext.applySchema(roworder_items, schema)
dforder_items.registerTempTable("order_items")

sqlContext.sql("show tables").map(t=>"tableName is:"+t(1)).collect().foreach(println)
sqlContext.sql("select order_id,goods_id from order_items ").collect

sqlContext.sql("select orders.buyer_id, order_items.goods_id from order_items  join orders on order_items.order_id=orders.order_id ").collect

cd /data/spark5/
vim CheckSpark2

spark-sql

create table orders (order_id string,order_number string,buyer_id string,create_dt string)
row format delimited fields terminated by '\t'  stored as textfile;

create table order_items(item_id string,order_id string,goods_id string)
row format delimited fields terminated by '\t'  stored as textfile;

load data local inpath '/data/spark5/orders' into table orders;
load data local inpath '/data/spark5/order_items' into table order_items;

select * from orders;
select * from order_items;

select orders.buyer_id, order_items.goods_id from order_items join orders on order_items.order_id=orders.order_id;

cd /data/spark5/
spark-sql --num-executors 100 -e "show tables" > results.txt

cat /data/spark5/results.txt
  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值