大数据离线计算基本命令

最新推荐文章于 2024-03-22 20:20:53 发布

王甲兴

最新推荐文章于 2024-03-22 20:20:53 发布

阅读量351

点赞数

分类专栏： Sql 语句文章标签： mysql

本文链接：https://blog.csdn.net/wswddo/article/details/107971838

版权

Sql 语句专栏收录该内容

1 篇文章 0 订阅

订阅专栏

//创建分区表根据店铺名称字段分区
create table dianpu(month string,price int) partitioned by (name string) row format delimited fields terminated by ',';

//创建普通表
create table dianpu_tmp(name string,month string,price int) row format delimited fields terminated by ',';

//加载数据到普通表
load data local inpath '/opt/zuoye727' into table dianpu1;

//根据普通表的数据添加到分区表
insert into dianpu partition(name) select month,price,name from dianpu1;

//求出每个月销售额最高的一次记录
select month,max(price) from dianpu group by month;

//求出每个店铺的销售记录
select name,count(0) from dianpu group by name;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nostrick;
//启动进程
start-all.sh
//启动 mysql
service mysqld start
./hive --service hiveserver2 &
netstat -tunl
./beeline
!connect jdbc:hive2://hdp2:10000
root
root
// 查看表./
show tables;
// 添加jar 包
add jar /opt/MyUDF-hive.jar;
// 创建临时函数 getudf
create temporary function getUDF as 'com.bawei.MyUdf.MyUDF';
创建永久函数
方法一：
add jar /opt/apache-hive-1.2.2-bin/lib/hive-udf2.jar;
create function getjson AS 'com.bawei.hive.JsonToString';

// 查看函数
show functions;
// 创建表
0: jdbc:hive2://hdp2:10000> create table people(id int,name string,adder string,ufd int)
0: jdbc:hive2://hdp2:10000> row format delimited fields terminated by ',';
0: jdbc:hive2://hdp2:10000> load data local inpath '/opt/udf' into table people;
// 查看表
select * from people;
// 使用函数
select *,getudf(ufd) from people;

//查看永久函数
service mysqld starrt
mysql -uroot -p
use hive
show tables;
select * from FUNCE;

//启动服务
启动zk：
./zookeeper-server-start.sh -daemon ../config/zookeeper.properties
启动kafka:
./kafka-server-start.sh -daemon ../config/server.properties
查看topic列表：
```shell
./kafka-topics.sh --list --zookeeper hdp2:2181
创建topic:
./kafka-topics.sh --create --zookeeper hdp2:2181 --replication-factor 1 --partitions 1 --topic test
生产者：
./kafka-console-producer.sh --broker-list hdp2:9092 --topic topic_app_startup
消费者：
./kafka-console-consumer.sh --bootstrap-server hdp1:9092 --topic topic_app_startup

// flume采集kafka数据到hdfs
bin/flume-ng agent --conf conf --conf-file conf/kafka-hdfs.conf --name a1 -Dflume.root.logger=INFO,console

启动hiveserver2或者直接进入hive
./hive --service hiveserver2 &
./beeline
!connect jdbc:hive2://hdp2:10000
查看数据库：show databases;
使用数据库：use applogsdb;

王甲兴

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
大数据离线计算基本命令

//创建分区表根据店铺名称字段分区create table dianpu(month string,price int) partitioned by (name string) row format delimited fields terminated by ',';//创建普通表create table dianpu_tmp(name string,month string,price int) row format delimited fields terminated by ',';...
复制链接

扫一扫

专栏目录