sql("load data local inpath '/home/cyftest/long_text_2020-04-16-15-57-36.csv' overwrite into table ccic_dmhub_cdp.cyf_ce")
通过替换数据将数据替换成null
var s="""select
case when attr1 = 'NULL' then null else attr1 end attr1,
case when attr2 = 'NULL' then null else attr2 end attr2,
case when attr3 = 'NULL' then null else attr3 end attr3,
case when attr4 = 'NULL' then null else attr4 end attr4,
case when attr5 = 'NULL' then null else attr5 end attr5,
case when channel_account = 'NULL' then null else channel_account end channel_account,
case when channel_type = 'NULL' then null else channel_type end channel_type,
case when device = 'NULL' then null else device end device,
case when employee = 'NULL' then null else employee end employee,
inbound,
case when field_location = 'NULL' then null else field_location end field_location,
case when referrer = 'NULL' then null else referrer end referrer,
case when referrer_name = 'NULL' then null else referrer_name end referrer_name,
case when tag = 'NULL' then null else tag end tag,
case when target_id = 'NULL' then null else target_id end target_id,
case when target_name = 'NULL' then null else target_name end target_name,
case when campaign = 'NULL' then null else campaign end campaign,
case when campaign_name = 'NULL' then null else campaign_name end campaign_name,
case when content_name = 'NULL' then null else content_name end content_name,
case when source = 'NULL' then null else source end source,
case when summary = 'NULL' then null else summary end summary,
case when internal_content_id = 'NULL' then null else internal_content_id end internal_content_id,
case when last_forwarder = 'NULL' then null else last_forwarder end last_forwarder,
score,
last_updated,
ref_score,
case when page_id = 'NULL' then null else page_id end page_id,
case when event_group = 'NULL' then null else event_group end event_group,
case when batch_id = 'NULL' then null else batch_id end batch_id,
from_collector,
case when app_name = 'NULL' then null else app_name end app_name,
case when app_version = 'NULL' then null else app_version end app_version,
quantity1,
quantity2,
amount1,
amount2,
case when identity_value = 'NULL' then null else identity_value end identity_value,
case when identity_type = 'NULL' then null else identity_type end identity_type,
case when identity_value2 = 'NULL' then null else identity_value2 end identity_value2,
case when identity_type2 = 'NULL' then null else identity_type2 end identity_type2,
case when identity_value3 = 'NULL' then null else identity_value3 end identity_value3,
case when identity_type3 = 'NULL' then null else identity_type3 end identity_type3,
is_first_try,
is_first_engage,
duration,
case when utma = 'NULL' then null else utma end utma,
case when utm = 'NULL' then null else utm end utm,
quantity3,
date1,
date2,
case when platform = 'NULL' then null else platform end platform,
case when os = 'NULL' then null else os end os,
case when os_version = 'NULL' then null else os_version end os_version,
case when browser = 'NULL' then null else browser end browser,
case when browser_version = 'NULL' then null else browser_version end browser_version,
case when model = 'NULL' then null else model end model,
case when network = 'NULL' then null else network end network,
case when ip = 'NULL' then null else ip end ip,
case when ip_county = 'NULL' then null else ip_county end ip_county,
case when ip_city = 'NULL' then null else ip_city end ip_city,
case when ip_province = 'NULL' then null else ip_province end ip_province,
case when ip_country = 'NULL' then null else ip_country end ip_country,
case when screen_width = 'NULL' then null else screen_width end screen_width,
case when screen_height = 'NULL' then null else screen_height end screen_height,
case when domain = 'NULL' then null else domain end domain,
case when path = 'NULL' then null else path end path,
case when attr6 = 'NULL' then null else attr6 end attr6,
case when attr7 = 'NULL' then null else attr7 end attr7,
case when attr8 = 'NULL' then null else attr8 end attr8,
case when attr9 = 'NULL' then null else attr9 end attr9,
case when attr10 = 'NULL' then null else attr10 end attr10,
amount3,
date3,
case when page_type = 'NULL' then null else page_type end page_type,
case when session_id = 'NULL' then null else session_id end session_id,
case when url = 'NULL' then null else url end url,
case when attr11 = 'NULL' then null else attr11 end attr11,
case when attr12 = 'NULL' then null else attr12 end attr12,
case when attr13 = 'NULL' then null else attr13 end attr13,
case when attr14 = 'NULL' then null else attr14 end attr14,
case when attr15 = 'NULL' then null else attr15 end attr15,
case when attr16 = 'NULL' then null else attr16 end attr16,
case when attr17 = 'NULL' then null else attr17 end attr17,
case when attr18 = 'NULL' then null else attr18 end attr18,
case when attr19 = 'NULL' then null else attr19 end attr19,
case when attr20 = 'NULL' then null else attr20 end attr20,
case when attr21 = 'NULL' then null else attr21 end attr21,
case when attr22 = 'NULL' then null else attr22 end attr22,
case when attr23 = 'NULL' then null else attr23 end attr23,
case when attr24 = 'NULL' then null else attr24 end attr24,
case when attr25 = 'NULL' then null else attr25 end attr25,
case when attr26 = 'NULL' then null else attr26 end attr26,
case when attr27 = 'NULL' then null else attr27 end attr27,
case when attr28 = 'NULL' then null else attr28 end attr28,
case when attr29 = 'NULL' then null else attr29 end attr29,
case when attr30 = 'NULL' then null else attr30 end attr30,
case when attr31 = 'NULL' then null else attr31 end attr31,
case when attr32 = 'NULL' then null else attr32 end attr32,
case when attr33 = 'NULL' then null else attr33 end attr33,
case when attr34 = 'NULL' then null else attr34 end attr34,
case when attr35 = 'NULL' then null else attr35 end attr35,
case when attr36 = 'NULL' then null else attr36 end attr36,
case when attr37 = 'NULL' then null else attr37 end attr37,
case when attr38 = 'NULL' then null else attr38 end attr38,
case when attr39 = 'NULL' then null else attr39 end attr39,
case when attr40 = 'NULL' then null else attr40 end attr40,
date4,
date5,
date6,
date7,
date8,
date9,
date10,
amount4,
amount5,
amount6,
amount7,
amount8,
amount9,
amount10,
longitude,
latitude,
case when source1 = 'NULL' then null else source1 end source1,
case when source2 = 'NULL' then null else source2 end source2,
case when source3 = 'NULL' then null else source3 end source3,
case when source4 = 'NULL' then null else source4 end source4,
case when source5 = 'NULL' then null else source5 end source5,
case when source6 = 'NULL' then null else source6 end source6,
case when source7 = 'NULL' then null else source7 end source7,
case when source8 = 'NULL' then null else source8 end source8,
flow_id,
flow_version,
step_id
from cyf_ce"""
//创建df
var df=sql(s)
//注册临时表 或者dataframe.registerTempTable("temp_ce")
//创建表
sql("create table customer_event_c stored as TEXTFILE as select * from temp_ce")