flume监听
[root@hadoop104 conf]# pwd
/opt/module/apache-flume-1.6.0-bin/conf
[root@hadoop104 conf]# vi qc.conf
配置内容如下
name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = tail -f //home/temdata/qianchen.txt
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path =hdfs://192.168.10.104:9000/flume/tailout/%y-%m-%d/%H-%M/
a1.sinks.k1.hdfs.filePrefix = log-
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
a1.sinks.k1.hdfs.rollInterval = 3
a1.sinks.k1.hdfs.rollSize = 20
a1.sinks.k1.hdfs.rollCount = 5
a1.sinks.k1.hdfs.batchSize = 1
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#生成的文件类型,默认是Sequencefile,可用DataStream,则为普通文本
a1.sinks.k1.hdfs.fileType = DataStream
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
保存,并运行
[root@hadoop104 conf]# ../bin/flume-ng agent -c conf -f qc.conf -name a1 -Dflume.root.logger=DEBUG,console
进入hive
1.创建数据库
hive> create database if not exists qianchen;
hive> use qianchen;
2.创建表
hive> create table t_qianchen(id string,position string,salary string,company string,place string,job_require string,eduction string,experience string)row format delimited fields terminated by ',' stored as textfile ;
hive> load data local inpath "/home/temdata/qianchen.txt" into table t_qianchen;
hive> select * from t_qianchen;
重命名数据名:
hive> select positiion as position from t_qianchen ;
新建一个表t_qianchen1,拆分数据:
hive> create table t_qianchen1 as select position,salary from t_qianchen;
去除字段里面的双引号:
hive> create table t_qianchen2 as select regexp_replace(position, '"','')as position,regexp_replace(salary, '"','')as salary from t_qianchen1;
去掉中括号的命令:
hive> create table t_qianchen12 as select regexp_replace(salary, '\\[|\\]','')as salary from t_qianchen1;
查字段名:
hive> desc 表名;
修改表名:
hive> alter table t_qianchen2 rename to t_qianchen1;
去重:
hive> create table t_qianchen2 as select position ,salary from t_qianchen1 group by position,salary;
一、 最低最高平均工资柱状图:
1.数据分析:
hive> create table t_qianchen3 as select position,salary from t_qianchen2 where position like '%数据分析%';
取数据分析工资字段shujufenxi_salary
create table shujufenxi_salary as select regexp_replace(salary, '\\[|\\]','')as salary from t_qianchen3;
(1)取出工资单位为年的shujufenxi_salary_year
create table shujufenxi_salary_year as select * from shujufenxi_salary where salary like '%年';
去除/年shujufenxi_salary_year1
hive> create table shujufenxi_salary_year1 as select regexp_replace(salary, '/年', '')as pay from shujufenxi_salary_year;
转换得到万的工资shujufenxi_salary_year1_1
hive> create table shujufenxi_salary_year1_1 as select regexp_replace(pay, '万','')as pay from shujufenxi_salary_year1 where pay like '%万';
切分shujufenxi_salary_year1_1_1
hive> create table shujufenxi_salary_year1_1_1 as select split(pay,'-')[0] as min ,split(pay,'-')[1] as max from shujufenxi_salary_year1_1;
分别的到最大最小SJFX1
hive> create table SJFX1 as select regexp_replace(max, 'salary:','')as max,regexp_replace(min, 'salary:','')as min from shujufenxi_salary_year1_1_1;
将万写成10000,求出每个月的工资,并求平均值SJFX
create table SJFX as select min*10000/12 as min,max*10000/12 as max,(min+max)/2*10000/12 as avg from SJFX1;
(2)取出工资单位为月的shujufenxi_salary_month
create table shujufenxi_salary_month as select * from shujufenxi_salary where salary like '%月';
去除/月shujufenxi_salary_month1
hive> create table shujufenxi_salary_month1 as select regexp_replace(salary, '/月', '')as pay from shujufenxi_salary_month;
2.1)转换得到万的工资shujufenxi_salary_month1_1
hive> create table shujufenxi_salary_month1_1 as select regexp_replace(pay, '万','')as pay from shujufenxi_salary_month1 where pay like '%万';
切分shujufenxi_salary_month1_1_1
create table shujufenxi_salary_month1_1_1 as select split(pay,'-')[0] as min ,split(pay,'-')[1] as max from shujufenxi_salary_month1_1;
分别的到最大最小SJFX2
hive> create table SJFX2 as select regexp_replace(max, 'salary:','')as max,regexp_replace(min, 'salary:','')as min from shujufenxi_salary_month1_1_1;
将万写成10000,并求平均值SJFX21
hive> create table SJFX21 as select min*10000 as min,max*10000 as max,(min+max)/2*10000 as avg from SJFX2 ;
将SJFX21插入数据到SJFX
hive> insert into SJFX select * from SJFX21;
2.2)转换得到千的工资shujufenxi_salary_month1_2
hive> create table shujufenxi_salary_month1_2 as select regexp_replace(pay, '千','')as pay from shujufenxi_salary_month1 where pay like '%千';
切分shujufenxi_salary_month1_2_1
hive> create table shujufenxi_salary_month1_2_1 as select split(pay,'-')[0] as min ,split(pay,'-')[1] as max from shujufenxi_salary_month1_2;
分别的到最大最小SJFX3
hive> create table SJFX3 as select regexp_replace(max, 'salary:','')as max,regexp_replace(min, 'salary:','')as min from shujufenxi_salary_month1_2_1;
将千写成1000,并求平均值SJFX31
create table SJFX31 as select min*1000 as min,max*1000 as max,(min+max)/2*1000 as avg from SJFX3 ;
将SJFX31插入数据到SJFX
hive> insert into SJFX select * from SJFX31;
(3)终:三表合一SJFX,求最大最小平均SJFX_all
hive> create table SJFX_all as select min(min) as min,max(m