url ='http://top.baidu.com/buzz?b=1&fr=topindex'
header={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36','Referer':'http://top.baidu.com/'}
r = requests.get(url,headers=header)
r.encoding='gbk'#print(r.text)
selector = etree.HTML(r.text)# 二次检索
eles = selector.xpath('//td[@class="keyword"]/a[1]')#print(len(eles))
2-通过二次检索的方式 把我们需要的字段值 一一获取
ls =[]for ele in eles:#print(index+1)# 百度热搜主题
title = ele.xpath('./text()')[0]#print(title)# 百度热搜主题链接
url = ele.xpath('./@href')[0]#print(url)
crawled_time = datetime.now()
temp_ls ={
}
temp_ls['title']=title
temp_ls['url']= url
temp_ls['crawled_time']=str(crawled_time)
ls.append(temp_ls)#print(ls)
data_email =''for index , email_ls in