首先获取网站内容后,从内容中截取出ICP信息
--从text中初步截取出ICP
update z_beijing_all_web33 set ent_icp=substr(web_text,instr(web_text,'ICP',1)-2,35)
where is_yellp is null and web_text like '%ICP%' and ent_icp is null
update z_beijing_all_web33 set ent_icp=substr(web_text,instr(web_text,'icp',1)-2,35)
where is_yellp is null and web_text like '%icp%' and ent_icp is null
--截取icp
--截取ICP第一步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'号-','1')+2)
where ent_icp is not null and ent_icp like '%号-%'
--截取ICP第二步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号-%'
--截取ICP第三步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%ICP证%' and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第四步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%许可证%' and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第五步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,':','1')+1,length(ent_icp))
where ent_icp is not null and (ent_icp like '%备案%' and ent_icp like '%:%' ) and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第六步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,':','1')+1,length(ent_icp))
where ent_icp is not null and (ent_icp like '%备案%' and ent_icp like '%:%' ) and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第七步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'号','1'))
where ent_icp is not null and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp not like '%证号%'
--截取ICP第八步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%许可证%' and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第九步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号%' and ent_icp not like '%号-%'
--截取ICP第十步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp like '%证%' and ent_icp like '%ICP备%'
--截取ICP第十一步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'京公网安备','1')-1)
where ent_icp like '%京公网安备%'
--截取ICP第十二步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'固定电话','1')-1)
where ent_icp like '%固定电话%'
--截取ICP第十三步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'电话','1')-1)
where ent_icp like '%电话%'
--截取ICP第十四步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'咨询热线','1')-1)
where ent_icp like '%咨询热线%'
--截取ICP第十五步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'版权所有','1')-1)
where ent_icp like '%版权所有%'
--截取ICP第十六步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'地址','1')-1)
where ent_icp like '%地址%'
-----------------------------
update z_beijing_all_web33 set ent_icp = replace(ent_icp,'?','') where ent_icp like '%?%'
--从text中初步截取出ICP
update z_beijing_all_web33 set ent_icp=substr(web_text,instr(web_text,'ICP',1)-2,35)
where is_yellp is null and web_text like '%ICP%' and ent_icp is null
update z_beijing_all_web33 set ent_icp=substr(web_text,instr(web_text,'icp',1)-2,35)
where is_yellp is null and web_text like '%icp%' and ent_icp is null
--截取icp
--截取ICP第一步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'号-','1')+2)
where ent_icp is not null and ent_icp like '%号-%'
--截取ICP第二步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号-%'
--截取ICP第三步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%ICP证%' and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第四步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%许可证%' and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第五步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,':','1')+1,length(ent_icp))
where ent_icp is not null and (ent_icp like '%备案%' and ent_icp like '%:%' ) and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第六步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,':','1')+1,length(ent_icp))
where ent_icp is not null and (ent_icp like '%备案%' and ent_icp like '%:%' ) and ent_icp like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第七步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'号','1'))
where ent_icp is not null and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp not like '%证号%'
--截取ICP第八步
update z_beijing_all_web33 set ent_icp= substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%许可证%' and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp like '%ICP备%'
--截取ICP第九步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号%' and ent_icp not like '%号-%'
--截取ICP第十步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,instr(ent_icp,'ICP备','1')-1,length(ent_icp))
where ent_icp is not null and ent_icp like '%号%' and ent_icp not like '%号-%' and ent_icp like '%证%' and ent_icp like '%ICP备%'
--截取ICP第十一步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'京公网安备','1')-1)
where ent_icp like '%京公网安备%'
--截取ICP第十二步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'固定电话','1')-1)
where ent_icp like '%固定电话%'
--截取ICP第十三步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'电话','1')-1)
where ent_icp like '%电话%'
--截取ICP第十四步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'咨询热线','1')-1)
where ent_icp like '%咨询热线%'
--截取ICP第十五步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'版权所有','1')-1)
where ent_icp like '%版权所有%'
--截取ICP第十六步
update z_beijing_all_web33 set ent_icp=substr(ent_icp,0,instr(ent_icp,'地址','1')-1)
where ent_icp like '%地址%'
-----------------------------
update z_beijing_all_web33 set ent_icp = replace(ent_icp,'?','') where ent_icp like '%?%'