答题日志练习

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

1.
hdfs  dfs -mkdir -p /app/data/exam
hdfs dfs -put /root/data/answer_question.log /app/data/exam
2.
def main(args: Array[String]): Unit = {
	val conf = new SparkConf().setAppName("exam").setMaster("local[*]")
	val sc = new SparkContext(conf)
	val rdd = sc.textFile("hdfs://192.168.56.100:9000/app/data/exam/answer_question.log")
	rdd.map(line => {
		var datas = line.split(" ")
		var arr = datas(9).split("_")
		(arr(1),arr(2),arr(3).replace("r",""),data(10).split(",")(0)).productIterator.mkString("\t")
	}).saveAsTextFile("hdfs://192168.56.100/app/data/result")
}
3.
create_namespace 'exam'
create 'exam:analysis','accuracy','question'  # 两个列簇accuracy、question
4.
drop database ex_exam cascade;  # 删表加删库,谨慎
create database exam;
use exam;

create external table ex_exam_record (
topic_id string,
student_id string,
question_id string,
score float
)
row format delimited fields terminated by '\t'
location 'hdfs://192.168.56.100:9000/app/data/result';

create external table ex_exam_analysis (
student_id string,
total_score string,
question_count int,
accuracy float
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping"=":key,accuracy:total_score,accuracy:question_count,accuracy:accuracy")
tblproperties("hbase.table.name"="exam:analysis");

create external table ex_exam_question (
student_id string,
right string,
half string,
error float
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping"=":key,question:right,question:half,question:error")
tblproperties("hbase.table.name"="exam:analysis");
5.
insert into ex_exam_analysis
select student_id,
sum(score) as total_score,
count(topic_id) as question_count,
sum(score)/count(topic_id) as accuracy
from ex_exam_record er
group by student_id;
6.
with 
t1 as (select er.student_id,er.score,concat_wa(',',collect_set(question_id)) as question from ex_exam_record er group by er.student_id,er.score)
insert into table ex_exam_question
select student_id,
max(case when score=1.0 then question else '' end) as right,
max(case when score=0.5 then question else '' end) as half,
max(case when score=0 then question else '' end) as error
from t1
group by student_id

scan 'exam:analysis',{COLUMN=>'question'}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值