公司代码:
outputDir=/data/jobfile/dsp-etl/data
#直接将是查询后的数据保存到linux某个文件上
function calculateflow()
{
local data_date=$1
hive -S -e "
set mapreduce.job.queuename=root.etl;
SELECT t1.channel AS channel
,t1.template_id AS template_id
,t1.platform AS platform
,t1.count_per_date AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM (
SELECT channel AS channel
,CASE WHEN channel='BES' THEN REGEXP_REPLACE(CREATIVE_SPECS[0],' ',',')
WHEN channel='TANX' THEN CONCAT_WS(',',creative_specs)
END AS template_id
,CASE WHEN os='0' THEN 'UNKNOWN'
WHEN os='1' THEN 'IOS'
WHEN os='2' THEN 'ANDROID'
WHEN os='3' THEN 'WINDOWS'
WHEN os='4' THEN 'SYMBIAN'
WHEN os='5' THEN 'JAVA'
WHEN os='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE os
END AS platform
,count(1) AS count_per_date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND creative_specs IS NOT NULL
AND (channel='BES' or channel='TANX')
GROUP BY channel,creative_specs,adSize,os
) t1
UNION ALL
SELECT 'GDT' AS channel
,new_template_id AS template_id
,CASE WHEN t2.platform='0' THEN 'UNKNOWN'
WHEN t2.platform='1' THEN 'IOS'
WHEN t2.platform='2' THEN 'ANDROID'
WHEN t2.platform='3' THEN 'WINDOWS'
WHEN t2.platform='4' THEN 'SYMBIAN'
WHEN t2.platform='5' THEN 'JAVA'
WHEN t2.platform='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE t2.platform
END AS platform
,t2.count_per_date AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM (
SELECT
creative_specs AS template_id
,os AS platform
,count(1) AS count_per_date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND creative_specs IS NOT NULL
AND channel='GDT'
GROUP BY creative_specs,os
) t2
LATERAL VIEW EXPLODE(t2.template_id) tmp AS new_template_id
UNION ALL
SELECT 'YK' AS channel
,new_template_id as template_id
,CASE WHEN t3.platform='0' THEN 'UNKNOWN'
WHEN t3.platform='1' THEN 'IOS'
WHEN t3.platform='2' THEN 'ANDROID'
WHEN t3.platform='3' THEN 'WINDOWS'
WHEN t3.platform='4' THEN 'SYMBIAN'
WHEN t3.platform='5' THEN 'JAVA'
WHEN t3.platform='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE t3.platform
END AS platform
,t3.count_per_date AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM (
SELECT
creative_specs AS template_id
,os AS platform
,count(1) AS count_per_date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND creative_specs IS NOT NULL
AND natives IS NOT NULL
AND channel='YOUKU'
GROUP BY creative_specs,os
) t3
LATERAL VIEW EXPLODE(t3.template_id) tmp AS new_template_id
UNION ALL
SELECT 'YK' AS channel
,CONCAT_WS(',',creative_specs) AS template_id
,CASE WHEN os='0' THEN 'UNKNOWN'
WHEN os='1' THEN 'IOS'
WHEN os='2' THEN 'ANDROID'
WHEN os='3' THEN 'WINDOWS'
WHEN os='4' THEN 'SYMBIAN'
WHEN os='5' THEN 'JAVA'
WHEN os='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE os
END AS platform
,count(1) AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND creative_specs IS NOT NULL
AND natives IS NULL
AND channel='YOUKU'
GROUP BY creative_specs,os
UNION ALL
SELECT 'WK' AS channel
,new_template_id AS template_id
,CASE WHEN t2.platform='0' THEN 'UNKNOWN'
WHEN t2.platform='1' THEN 'IOS'
WHEN t2.platform='2' THEN 'ANDROID'
WHEN t2.platform='3' THEN 'WINDOWS'
WHEN t2.platform='4' THEN 'SYMBIAN'
WHEN t2.platform='5' THEN 'JAVA'
WHEN t2.platform='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE t2.platform
END AS platform
,t2.count_per_date AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM (
SELECT
creative_specs AS template_id
,os AS platform
,count(1) AS count_per_date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND creative_specs IS NOT NULL
AND channel='WK'
GROUP BY creative_specs,os
) t2
LATERAL VIEW EXPLODE(t2.template_id) tmp AS new_template_id
UNION ALL
SELECT 'IQY' AS channel
,placement_id as template_id
,CASE WHEN os='0' THEN 'UNKNOWN'
WHEN os='1' THEN 'IOS'
WHEN os='2' THEN 'ANDROID'
WHEN os='3' THEN 'WINDOWS'
WHEN os='4' THEN 'SYMBIAN'
WHEN os='5' THEN 'JAVA'
WHEN os='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE os
END AS platform
,count(1) AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND channel = 'IQY'
AND placement_id IS NOT NULL
GROUP BY placement_id,os
UNION ALL
SELECT 'TOUTIAO' AS channel
,single_type as template_id
,CASE WHEN os='0' THEN 'UNKNOWN'
WHEN os='1' THEN 'IOS'
WHEN os='2' THEN 'ANDROID'
WHEN os='3' THEN 'WINDOWS'
WHEN os='4' THEN 'SYMBIAN'
WHEN os='5' THEN 'JAVA'
WHEN os='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE os
END AS platform
,count(1) AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM (
SELECT single_type
,os
FROM (
SELECT split(toutiao_ad_type,',') as ad_type
,os
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND channel = 'TOUTIAO'
AND toutiao_ad_type IS NOT NULL
) t1
LATERAL VIEW EXPLODE(ad_type) tmp as single_type
) t2
GROUP BY single_type,os
UNION ALL
SELECT 'WAX' AS channel
,CASE WHEN wax_feed_type = '0' THEN '1'
ELSE wax_feed_type
END AS template_id
,CASE WHEN os='0' THEN 'UNKNOWN'
WHEN os='1' THEN 'IOS'
WHEN os='2' THEN 'ANDROID'
WHEN os='3' THEN 'WINDOWS'
WHEN os='4' THEN 'SYMBIAN'
WHEN os='5' THEN 'JAVA'
WHEN os='UNKNOWN_OS' THEN 'UNKNOWN'
ELSE os
END AS platform
,count(1) AS count_per_date
,0 AS exposure
,0 AS click
,0 AS cost
,0 AS floor
,'${data_date}' AS date
FROM ad.ad_request_log
WHERE data_date=${data_date}
AND channel = 'WAX'
AND wax_feed_type IS NOT NULL
GROUP BY wax_feed_type,os
;" > "${outputDir}/adflowforecast_${data_date}.txt"
}
function main()
{
local data_date=$1
if [ -z ${data_date} ]; then
data_date=`date +%Y%m%d%H -d "-1 day"`
fi
calculateflow ${data_date}
# writing to database
java -Dprofiles.active=prod -cp middleware-utils.jar cn.middleware.database.Application -type AdFlowForecast -dataFilePath ${outputDir}/adflowforecast_${data_date}.txt
}
main $1
Application 类代码:
/**
* Created by fitz on 2017/6/9.
*/
@SpringBootApplication
@Log4j
public class Application implements CommandLineRunner {
@Autowired
private ServiceFactory factory;
/**
* 根据命令行的作业类型,找到对应的数据解析器
*
* @param type
* @return
*/
private Parser getParser(String type) {
Parser parser = null;
for (JobType $e : JobType.values()) {
if ($e.getType().equals(type)) {
parser = $e.getParser();
}
}
return parser;
}
/**
* 根据命令行的作业名称,找到对应的枚举类型
*
* @param type
* @return
*/
private JobType getJobType(String type) {
JobType jobType = null;
for (JobType $e : JobType.values()) {
if ($e.getType().equals(type)) {
jobType = $e;
break;
}
}
return jobType;
}
public static void main(String[] args) throws ParseException, IOException {
SpringApplication.run(Application.class, args);
}
@Override
public void run(String... args) throws Exception {
// 解析命令行参数
Options options = new Options();
options.addOption("type", true, "Job Type");
options.addOption("dataFilePath", true, "Hive Export ResultSet");
CommandLine cmd = new PosixParser().parse(options, args);
if (!cmd.hasOption("type") || !cmd.hasOption("dataFilePath")) {
log.error("命令行参数中未含有type参数或dataFilePath参数!");
System.exit(-1);
}
// 解析数据文件并逐行写入队列
File dataFile = new File(cmd.getOptionValue("dataFilePath"));
if (!dataFile.exists()) {
log.error("数据文件不存在,文件路径为:" + cmd.getOptionValue("dataFilePath"));
System.exit(-1);
}
FileReader fr = new FileReader(new File(cmd.getOptionValue("dataFilePath")));
BufferedReader bf = new BufferedReader(fr);
Parser parser = getParser(cmd.getOptionValue("type"));
JobType jobType = getJobType(cmd.getOptionValue("type"));
if (parser == null) {
log.error("无法找到对应的解析器,请检查命令行的type参数!");
System.exit(-1);
}
Service service = factory.getServiceImpl(jobType);
String line = null;
LinkedList<Domain> list = new LinkedList<Domain>();
while ((line = bf.readLine()) != null) {
// 将解析后的对象插入链表
list.add(parser.parse(line));
}
// 批量插入数据库
service.batchInsert(list);
}
}
将linux系统上的文件内容进行解析,保存到mysql中。