常用命令列表

最新推荐文章于 2023-03-26 00:21:48 发布

小辉_318

最新推荐文章于 2023-03-26 00:21:48 发布

阅读量396

点赞数

文章标签： mysql spark hbase impala

本文链接：https://blog.csdn.net/u014243407/article/details/78904021

版权

常用笔记

@(我的第一个笔记本)[不慕将来, 不负当下, ]

conf

hdfs配置项,块位置信息开启

<property>
    <name>dfs.client.file-block-storage-locations.timeout</name>
    <value>10000</value>
</property>
<property>
    <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
    <value>true</value>
</property>

sql

spark

spark sql

create table

drop table httpdata;
create EXTERNAL table httpdata(imsi string) partitioned by(day int) stored as textfile location '/home/etl/export/parquet/mobile/httpData/';
alter table httpdata add partition(day=20170211);
create EXTERNAL table shandong_lac_ci(lac_ci string) stored as textfile location '/xiaohui/shandong_lac_ci';

select count(*) from cellinfo_20171124 where concat(cast(lac as string),",",cast(ci as string)) in (select lac_ci from shandong_lac_ci);

select count(*) from cellinfo_20171124 ci join shandong_lac_ci slc on concat(cast(ci.lac as string),",",cast(ci.ci as string)) =slc.lac_ci;

beeline

spark sql语句执行位置

beeline -u jdbc:hive2://dmp162:10000/default -n root
beeline -u jdbc:hive2://dmp8:10000 -n root --showHeader=false --delimiterForDSV='|' --outputformat=csv2  -e "select * from location_gd_00_result where length(msisdn)=11" >/home/00.csv

spark-submit

提交任务

spark-submit --master spark://dmp8:7077 --class com.everdata.mlib.SparkGBTs PhoneScam-0.0.1-SNAPSHOT.jar /xiaohui/gbdt/app_res_output_7days.log
java -classpath eversec-0.1.jar com.eversec.AreaImport importArea_20171030_00.csv >zdqy2017103000.txt

spark-submit --master spark://evercloud47:7077 --jars /home/GN/hbase-1.2.2/lib/hbase-annotations-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-annotations-1.2.2-tests.jar,/home/GN/hbase-1.2.2/lib/hbase-client-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-common-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-common-1.2.2-tests.jar,/home/GN/hbase-1.2.2/lib/hbase-examples-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-external-blockcache-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-hadoop2-compat-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-hadoop-compat-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-it-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-it-1.2.2-tests.jar,/home/GN/hbase-1.2.2/lib/hbase-prefix-tree-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-procedure-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-protocol-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-resource-bundle-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-rest-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-server-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-server-1.2.2-tests.jar,/home/GN/hbase-1.2.2/lib/hbase-shell-1.2.2.jar,/home/GN/hbase-1.2.2/lib/hbase-thrift-1.2.2.jar,/home/GN/hbase-1.2.2/lib/metrics-core-2.2.0.jar --class com.everdata.hbase.ReadParquetSort /home/GN/hbase-1.2.2/PhoneScam-0.0.1-SNAPSHOT.jar  /location_hour/day=$day/hour=$hour

spark conf

spark-default.conf

spark.serializer                 org.apache.spark.serializer.KryoSerializer
spark.driver.memory              30g
spark.executor.memory        40g
spark.sql.autoBroadcastJoinThreshold    2147483624
spark.scheduler.mode        FAIR
spark.default.parallelism    100
spark.kryoserializer.buffer.max 2047
#spark.executor.extraJavaOptions -XX:+PrintGCTimeStamps -XX:+UseParNewGC -XX:SurvivorRatio=3 -XX:NewRatio=1 -XX:MaxTenuringThreshold=1000 -XX:MaxPermSize=1024m
spark.executor.extraJavaOptions  -XX:+CMSClassUnloadingEnabled -XX:MaxTenuringThreshold=5 -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+UseCMSCompactAtFullCollection -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:PermSize=2048M -XX:MaxPermSize=4096m -XX:+UseCompressedOops
spark.eventLog.enabled  true
spark.eventLog.dir  hdfs://logSave/tmp/historydir
#spark.storage.memoryFraction   0.5
spark.executor.extraClassPath=/home/xiaohui/hbase-1.2.2/lib/mysql-connector-java-5.1.21.jar
spark.driver.extraClassPath=/home/xiaohui/hbase-1.2.2/lib/mysql-connector-java-5.1.21.jar
spark.sql.autoBroadcastJoinThreshold    2147483647
spark.rpc.askTimeout    300
spark.locality.wait 10
spark.driver.maxResultSize  10g
spark.shuffle.blockTransferService  nio

spark-env.sh

export HADOOP_CONF_DIR=/home/dmp/hadoop/etc/hadoop
export SPARK_MASTER_IP=dmp8
export SPARK_WORKER_CORES=10
export SPARK_WORKER_MEMORY=80g
export SPARK_WORKER_INSTANCES=1
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_WEBUI_PORT=8098
export SPARK_WORKER_WEBUI_PORT=8099

impala sql

impala-shell

查询结果并输出到文件，自定义分隔符

impala-shell -q "select r.msisdn,r.day,r.hour,c.lat,c.lon,r.num from(select msisdn,lac,ci,day,hour,count(*) as num from roaming_destination_south_xj where day=20171016 and (hour>='8' and  hour<'12') group by msisdn,lac,ci,day,hour) r join cellinfo c on c.mnc=1 and r.lac=c.lac and r.ci=c.ci;" -B --output_delimiter="|" -o zdqy2017101612.txt
impala-shell -q "select distinct concat(cast(lac as string),',',cast(ci as string)) from statistic_hour_user_cell where day>20171110 and day<20171118;" -B --output_delimiter="|" -o lac_ci.txt

更改表名

alter table oldtablename rename to newtablename;

创建删除表

drop table cellinfo_20171124;
CREATE TABLE cellinfo_20171124(
mcc int,
mnc int,
lac int,
ci int,
lat double,
lon double,
acc int,
date int,
validity int,
addr string,
province string,
city string,
district string,
township string
)row format delimited FIELDS terminated by '\t' stored as textfile location '/cellinfo/20171124';

插入表

insert overwrite table statistic_hour_app_user partition(day=20171123,hour='12') select appid,msisdn,count(msisdn) as pv,sum(totalflow) as flow from base_dmp where day=20171124 and hour='12' group by appid,msisdn

hive sql

hbase

hbase conf

配置：hbase.master.maxclockske

<property>
 <name>hbase.master.maxclockskew</name>
 <value>200000</value>
 <description>Time difference of regionserver from master</description>
</property>

hbase sql

create 'location',{NAME =>'info', COMPRESSION => 'SNAPPY'}, SPLITS => ['01|','02|','03|','04|','05|','06|','07|','08|','09|','10|','11|','12|','13|','14|','15|','16|','17|','18|','19|','20|','21|','22|','23|','24|','25|','26|','27|','28|','29|','30|','31|','32|','33|','34|','35|','36|','37|','38|','39|','40|','41|','42|','43|','44|','45|','46|','47|','48|','49|','50|','51|','52|','53|','54|','55|','56|','57|','58|','59|','60|','61|','62|','63|','64|','65|','66|','67|','68|','69|','70|','71|','72|','73|','74|','75|','76|','77|','78|','79|','80|','81|','82|','83|','84|','85|','86|','87|','88|','89|','90|','91|','92|','93|','94|','95|','96|','97|','98|','99|']  

scan 'location',{COLUMNS=>'info:flow',STARTROW=>'96942102671',ENDROW=>'96942102672'}
96942102671
count 'location',{ENDROW=>'01'}

scan 'location:info', {COLUMNS => ['flow'], LIMIT => 10, STARTROW => '99444008681'}
scan 'location',{LIMIT => 10}
describe 'location'
disable 'location'
drop 'location'

put 'location','11111','flow:age','24'

@(修改hbase表列超时时间,ttl单位是s)
1. disable ‘location’
2. alter “location”,NAME=>’flow’,TTL=>’200’
3. enable ‘location’
4. scan ‘location’,{LIMIT => 5}

mysql

修改密码

update mysql.user set passwd='xxx' where user='root';  
update mysql.user set authentication_string="xxx" where user="root";
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%'  WITH GRANT OPTION;
FLUSH PRIVILEGES;
 grant all privileges on *.* to root@'%' identified by 'xxx';

update mysql.user set password=password("xxx") where user="root";               
update mysql.user set password=password("xxx") where user="hive1";              
update mysql.user set password=password("starl1ghtbreak_S1B_666") where user="hive";                
GRANT ALL PRIVILEGES ON *.TO 'hive'@'%' IDENTIFIED BY 'starl1ghtbreak_S1B_666' WITH GRANT OPTION;
flush privileges;

sql语句

explain select sb.sld,sum(s2.pv),sum(s2.uv) from sld_bc sb join sld_20170927 s2 on s2.sld like concat("%",sb.sld);  
regexp_extract(t.sld,".*(\\.[a-zA-Z0-9]+\\.[a-zA-Z0-9]+)",1)

linux

profile

export JAVA_HOME=/usr/java/jdk1.8.0_101
export PATH=$JAVA_HOME/bin:$PATH 
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

/etc/security/limits.conf

root soft nofile 655350
root hard nofile 655350
root soft nproc unlimited
root hard nproc unlimited

命令

split

split -b 200M test.csv -a -d 3 location_   每200M分隔一个文件
split -l 100  test.csv -a -d 3 location_   每100行分隔一个文件

jstack

jstack -l pid

ls

按时间排序：

ll -ht

按大小排序：

ll -hS

date

date -d "1 days ago" +%Y-%m-%d

kafka

kafka命令

基本操作

创建kafka topic(一般4台机器都为12个分区)

./kafka-topics.sh --zookeeper evercloud44:2181 --partitions 12 --replication-factor 3 --create --topic window-behavior-log

查看topic

./kafka-topics.sh --describe --zookeeper dmp33:2181 --topic cdr

删除topic

./kafka-topics.sh --delete --zookeeper dmp33:2181 --topic cdr

查看topic数据条数

./kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list evercloud44:9092 --time -1 --topic window-behavior-log|awk -F ':' 'BEGIN{sum=0}{sum+=$3;print sum}'

查看topic具体数据

./kafka-console-consumer.sh --zookeeper evercloud44:2181 --topic cdr --from-beginning  -max-messages 10

高级操作

修改topic分区数（只能扩容）

./kafka-topics.sh --alter --zookeeper localhost:42182 --topic myboys1 --partitions 2

修改topic备份数

./kafka-reassign-partitions.sh --zookeeper localhost:42182  --reassignment-json-file  ../partitions-extension-push-token-topic.json --execute

partitions-extension-push-token-topic.json文件配置

{
        "partitions":
                [
                {
                        "topic": "myboys1",
                        "partition": 0,
                        "replicas": [1,2]    //指定副本所在brokerID
                },
                {
                        "topic": "myboys1",
                        "partition": 1,
                        "replicas": [2,3]
                },
                {
                        "topic": "myboys1",
                        "partition": 2,
                        "replicas": [1,3]
                }
                ],
        "version":1
}

ETL

启动命令

/home/dmp/etl/bin/etl.sh -d -c /home/dmp/etl/config/cdr-qh-unicom_car.config –Xmx20g –Xms10g -Xss2m

/home/dmp/etl/bin/etl.sh -d -c /home/dmp/etl/config/cdr-hb-unicom-simplify-car.config –Xmx20g –Xms10g -Xss2m

/home/dmp/etl/bin/etl.sh -d -c /home/dmp/etl/config/hb-unicom-kafka-getMessageForWeb.config –Xmx20g –Xms10g -Xss2m

/home/etl/bin/etl.sh -d -c /home/etl/config/cdr-hubei-unicom_car.config –Xmx20g –Xms10g -Xss2m

插件

{
                "plugin": "elasticsearch",
                "enable": true,
                "config": {
                    "es_config": {
                        "transport_address": [
                            "192.168.30.17:9300 ","192.168.30.18:9300 

","192.168.30.19:9300 

"
                        ],
                        "client_config": {
                            "cluster.name 

": "es-ha",
                            "client.transport.ignore_cluster_name": "false"
                        }
                    },
                    "index_config": {
                        "index": "window_message_log",
                        "type": "message_log",
                        "bulk_mode": "insert",
                        "bulk_size": "10m",
                        "bulk_thread": 6,
                        "bulk_interval": "180s",
                        "expand_index_by_time": true,
                        "expand_interval": "day",
                        "expand_date_fields": [
                            "_date_datetime"
                        ],
                        "id_fields": [],
                        "fields": [
                            "datetime",
                            "msisdn",
                            "confirmContent",
                            "content",
                            "recommend",
                            "order" 
                        ]
                    },
                    "parent_config": {
                        "enable": false,
                        "parent_bulk_mode": "insert",
                        "parent_id_fields": [
                            "msisdn"
                        ],
                        "parent_fields": []
                    }
                },
                "filters": [
                ]
            }

elasticsearch

index

curl -XPUT 'http://192.168.30.17:9200/_template/window_message_log_day_tmp 

' -d'{
    "template": "window_message_log*",
    "settings": {
        "number_of_shards": 12,
        "number_of_replicas": 1
    },
    "mappings": {
        "order_log": {
            "_all": {
                "enabled": false
            },
            "properties": {
                "datetime": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss",
                    "ignore_malformed": true },
                "confirmContent": {
                    "type": "string",
                    "index": "not_analyzed" },
                "msisdn": {
                    "type": "string",
                    "index": "not_analyzed" },
                "content": {
                    "type": "string",
                    "index": "not_analyzed" },
                "recommend": {
                    "type": "string",
                    "index": "not_analyzed" }, 
                "order": {
                    "type": "string",
                    "index": "not_analyzed" }
            }
        }
    }
}'

“`

小辉_318

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
常用命令列表

常用笔记@(我的第一个笔记本)[不慕将来, 不负当下, ]常用笔记confsqlsparkspark sqlcreate tablebeelinespark-submitspark confspark-defaultconfspark-envshimpala sqlimpala-shellhive sqlhbasehbase confhbase sqlmysql修
复制链接

扫一扫