1) Hive 启动命令集合
【hive元数据初始化和更新】
schematool -dbType mysql -initSchema
schematool -dbType mysql -upgradeSchema
# 将元数据服务开启
nohup hive --service metastore & >> nohup.out
# 将 hiveserver2 服务开启
nohup hive --service hiveserver2 & >> hiveserver2.out
# 进入 hive 的客户端
beeline
!connect jdbc:hive2://node03:10000
# 关闭:
ps -ef | grep hive
kill -9 pid
2)impala 服务和客户端启动
1. 启动hive的metastore服务
node03机器启动hive的metastore服务
cd /export/servers/hive-1.1.0-cdh5.14.0
nohup bin/hive --service metastore &
nohup bin/hive -- service hiveserver2 &
2. 启动主节点node03的impala
service impala-state-store start
service impala-catalog start
service impala-server start
3. 从节点启动node01与node02启动impala-server
service impala-server start
# 成功启动
成功启动的结果如下:
[root@node03 hive-1.1.0-cdh5.14.0]# impala-shell
Starting Impala Shell without Kerberos authentication
Connected to node03.hadoop.com:21000
Server version: impalad version 2.11.0-cdh5.14.0 RELEASE (build d68206561bce6b26762d62c01a78e6cd27aa7690)
***********************************************************************************
Welcome to the Impala shell.
(Impala Shell v2.11.0-cdh5.14.0 (d682065) built on Sat Jan 6 13:27:16 PST 2018)
To see more tips, run the TIP command.
***********************************************************************************
[node03.hadoop.com:21000] >
3)Kafka启动服务和客户端
#如何启动一个kafka集群
##需要在三台节点上都要执行
bin/kafka-server-start.sh -d config/server.properties
##如何关闭kafka集群
bin/kafka-server-stop.sh
#如果查看集群里的所有的主题
bin/kafka-topics.sh --zookeeper node01:2181,node02:2181,node03:2181 --list
#创建一个mytopic主题,主题包括3个分区,2个副本
bin/kafka-topics.sh --zookeeper node01:2181,node02:2181,node03:2181 --create --topic mytopic --replication-factor 2 --partitions 3
#当前主题 mytopic 详细的信息
bin/kafka-topics.sh --zookeeper node01:2181,node02:2181,node03:2181 --describe --topic mytopic
#列出当前的消费者群组
bin/kafka-consumer-groups.sh --new-consumer --bootstrap-server node01:9092,node02:9092,node03:9092 --list
#通过producer生产数据
bin/kafka-console-producer.sh --broker-list node01:9092,node02:9092,node03:9092 --topic mytopic
#控制台consumer消费一下数据
bin/kafka-console-consumer.sh --zookeeper node01:2181,node02:2181,node03:2181 --topic mytopic --from-beginning
4)redis配置、启动和关闭命令
#修改redis配置
cd /export/server/redis-3.2.8/
mkdir -p /export/server/redis-3.2.8/log
mkdir -p /export/server/redis-3.2.8/data
vim redis.conf
# 修改第61行,接收的访问地址
bind node1.itcast.cn
# 修改第128行,后台守护执行
daemonize yes
# 修改第163行,日志目录
logfile "/export/server/redis-3.2.8/log/redis.log"
# 修改第247行,数据持久化目录
dir /export/server/redis-3.2.8/data
#启动redis
cd /export/server/redis-3.2.8/
bin/redis-server redis.conf
或
/usr/local/redis/bin/redis-server /usr/local/redis/etc/redis.conf
#关闭redis
bin/redis-cli -h node1.itcast.cn shutdown
#连接redis客户端
cd /export/server/redis-3.2.8/
bin/redis-cli -h node1.itcast.cn
5)presto 的启动命令
每台机器都启动
/export/server/presto/bin/launcher start
单个客户端
/export/server/presto/bin/presto --server localhost:8090 --catalog hive --schema default
6) sqoop 导入脚本命令集
#! /bin/bash
SQOOP_HOME=/usr/bin/sqoop
if [[ $1 == "" ]];then
TD_DATE=`date -d '1 days ago' "+%Y-%m-%d"`
else
TD_DATE=$1
fi
V_YEAR=$(date --date="${TD_DATE}" +%Y)
V_MONTH=$(date --date="${TD_DATE}" +%m)
V_TABLE_web_chat_ems="web_chat_ems_${V_YEAR}_${V_MONTH}"
V_TABLE_web_chat_text_ems="web_chat_text_ems_${V_YEAR}_${V_MONTH}"
${SQOOP_HOME} import \
--connect jdbc:mysql://192.168.52.150:3306/nev \
--username root \
--password 123456 \
--driver com.mysql.jdbc.Driver \
--query "select id,
create_date_time,
session_id,
sid,
create_time,
seo_source,
seo_keywords,
ip,
area,
country,
province,
city,
origin_channel,
user as user_match,
manual_time,
begin_time,
end_time,
last_customer_msg_time_stamp,
last_agent_msg_time_stamp,
reply_msg_count,
msg_count,
browser_name,
os_info,
'${TD_DATE}' as starts_time
from ${V_TABLE_web_chat_ems}
where create_time between '${TD_DATE} 00:00:00' and '${TD_DATE} 23:59:59' and \$CONDITIONS" \
--hcatalog-database itcast_ods \
--hcatalog-table web_chat_ems \
--hcatalog-storage-stanza 'stored as orc tblproperties ("orc.compress"="ZLIB")' \
--hive-partition-key starts_time \
--hive-partition-value ${TD_DATE} \
-m 100 \
--split-by id
#如果想要等待上个后台命令执行完毕再执行后续任务,可以使用wait命令:
wait
${SQOOP_HOME} import \
--connect jdbc:mysql://192.168.52.150:3306/nev \
--username root \
--password 123456 \
--driver com.mysql.jdbc.Driver \
--query "
select id,
referrer,
from_url,
landing_page_url,
url_title,
platform_description,
other_params,
history,
'${TD_DATE}' as start_time
from ${V_TABLE_web_chat_text_ems} wcte,
(select id as wce_id, create_time
from ${V_TABLE_web_chat_ems}
where create_time between '${TD_DATE} 00:00:00' and '${TD_DATE} 23:59:59') wce
where wcte.id = wce.wce_id
and \$CONDITIONS" \
--hcatalog-database itcast_ods \
--hcatalog-table web_chat_text_ems \
--hcatalog-storage-stanza 'stored as orc tblproperties ("orc.compress"="ZLIB")' \
--hive-partition-key start_time \
--hive-partition-value ${TD_DATE} \
-m 100 \
--split-by id
7)sqoop导出常见命令
#导出订单总笔数表数据
bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/app_didi \
--username root \
--password 123456 \
--table t_order_total \
--export-dir /user/hive/warehouse/app_didi.db/t_order_total/month=2020-04
#导出预约和非预约订单统计数据
bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/app_didi \
--username root \
--password 123456 \
--table t_order_subscribe_total \
--export-dir /user/hive/warehouse/app_didi.db/t_order_subscribe_total/month=2020-04
sqoop export \
--connect "jdbc:mysql://192.168.52.150:3306/scrm_bi?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--driver com.mysql.jdbc.Driver \
--table itcast_consult \
--hcatalog-database itcast_dws \
--hcatalog-table consult_dws \
-m 100
8)安装superset
1. 安装linux依赖的软件
sudo yum install gcc gcc-c++ libffi-devel python-devel python-pip python-wheel openssl-devel cyrus-sasl-devel openldap-devel
2. 切换到一个python 环境
Conda activate pyspark_env
3. 安装apache-superset
Pip install apache-superset
4. 配置
4.1. 初始化数据库
superset db upgrade
4.2.初始化其他配置
# Create an admin user in your metadata database (use `admin` as username to be able to load the examples)
$ export FLASK_APP=superset
superset fab create-admin
# Load some data to play with 先不导入
# superset load_examples
# Create default roles and permissions
superset init
# To start a development web server on port 8088, use -p to bind to another port
superset run -h node1 -p 9999 --with-threads --reload --debugger
来自 <https://superset.apache.org/docs/installation/installing-superset-from-scratch>
9)mongodb 常见命令
# 开启mongo服务
/export/servers/mongodb-linux-x86_64-rhel70-3.4.24/bin/mongod &
[root@node03 bin]# mongod --rest
# 停止mongo服务
/export/servers/mongodb-linux-x86_64-rhel70-3.4.24/bin/mongod --shutdown
# 查看mongodb端口号
netstat -ntulp | grep 27017
# 查看mongdb数据库
find / -name "mongod.lock"
10)dolphinscheduler
1. 登录到 dolphinscheduler
http://node01:8888/view/login/index.html
用户名密码:admin/admin123
2. 启动前端服务
后台服务:$DOLPHINSCHEDULER_HOME/start-all.sh
前端: systemctl restart nginx
附录:
Nginx 配置文件,修改端口 8088=>8089
vim /etc/nginx/conf.d/dolphinscheduler.conf
ds的日志目录:
/opt/soft/dolphinscheduler/logs/
ds设置环境应用程序的变量
/export/servers/dolphinscheduler/dolphinscheduler-backend-1.2.0/conf/env/.dolphinscheduler_env.sh