CREATE TABLE IF NOT EXISTS user(id int, name string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
CREATE TABLE IF NOT EXISTS job(id int, position string, user_id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
CREATE TABLE IF NOT EXISTS user_job(new_id int, new_name string,new_job_id int, new_position string, new_user_id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
INSERT OVERWRITE TABLE user_job
SELECT TRANSFORM(user.id,user.name,job.id, job.position, job.user_id) USING 'python weekday_mapper.py'
AS (new_id, new_name,new_job_id, new_position, new_user_id) FROM user join job on user.id = job.user_id
weekday_mapper.py 代码
#!/bin/env python
import sys
import datetime
for line in sys.stdin:
line = line.strip()
new_id, new_name_suiyi, new_job_id, new_position, new_user_id = line.split('\t') #输入源字段,我理解是输入源的占位符
#这里可以根据输入源字段赋值,组成新的值,
new_name = new_name_suiyi+'李逵'
print '\t'.join([new_id, new_name, new_job_id, new_position, new_user_id]) #输出的新值必须和新的表字段对应
CREATE TABLE IF NOT EXISTS job(id int, position string, user_id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
CREATE TABLE IF NOT EXISTS user_job(new_id int, new_name string,new_job_id int, new_position string, new_user_id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
INSERT OVERWRITE TABLE user_job
SELECT TRANSFORM(user.id,user.name,job.id, job.position, job.user_id) USING 'python weekday_mapper.py'
AS (new_id, new_name,new_job_id, new_position, new_user_id) FROM user join job on user.id = job.user_id
weekday_mapper.py 代码
#!/bin/env python
import sys
import datetime
for line in sys.stdin:
line = line.strip()
new_id, new_name_suiyi, new_job_id, new_position, new_user_id = line.split('\t') #输入源字段,我理解是输入源的占位符
#这里可以根据输入源字段赋值,组成新的值,
new_name = new_name_suiyi+'李逵'
print '\t'.join([new_id, new_name, new_job_id, new_position, new_user_id]) #输出的新值必须和新的表字段对应