hive -e "
add file split_sjku_domain.py;
select transform(company_name,regexp_replace(corporate_website,'www.','') )
using 'split_sjku_domain.py' as
(company_name,domain)
from T
limit 20
"
split_sjku_domain.py
#coding=utf-8
import math
import sys
import datetime
for line in sys.stdin:
company_name,corporate_website = line.strip('\n').split('\t')
websites=corporate_website.split(',')
for i in range(len(websites)):
if i>5:
break
print '\t'.join([company_name,websites[i]])