from bs4 import BeautifulSoup
import urllib.request
url='https://topic.autohome.com.cn/new/home/sos.jsp?isNonCar=0&nonCar=0&brandId=25&seriesId=0&page=1'
open_it = urllib.request.urlopen(url)
报错:urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1108),是浏览器的认证问题。
解决方式是使用requests第三方库,这个库可不是Python3内置的urllib.request库,而是一个强大的基于urllib3的第三方库。
pip install requests
然后再来,targets_url = bf.find_all(class_=‘result-list’)会报错,改成targets_url = bf.find(class_=‘result-list’)就可以了。
from bs4 import BeautifulSoup
import requests
url='https://topic.autohome.com.cn/new/home/list.jsp?typeId=3'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = requests.get(url = url,headers = headers)
req.encoding = 'utf-8'
html = req.text
bf = BeautifulSoup(html, 'html.parser')
targets_url = bf.find(class_='result-list')
url_set = set()
for each in targets_url.find_all('a'):
if 'class' not in each.attrs.keys():
url_set.add(each['href'])
print(url_set)
{'http://topic.autohome.com.cn/new/marketing/2019/12/jetour/',
'https://topic.autohome.com.cn/act/marketing/2019/12/escape/',
'https://topic.autohome.com.cn/act/marketing/2019/12/mustang/',
'https://topic.autohome.com.cn/new/marketing/2019/11/kx3/',
'https://topic.autohome.com.cn/new/marketing/2019/11/tengshi/',
'https://topic.autohome.com.cn/new/marketing/2019/11/wmex5/',
'https://topic.autohome.com.cn/new/marketing/2019/12/compass/',
'https://topic.autohome.com.cn/new/marketing/2019/12/jkdzddg/',
'https://topic.autohome.com.cn/new/marketing/2020/1/corsair/',
'https://topic.autohome.com.cn/new/marketing/2020/3/xingyue/',
'https://topic.autohome.com.cn/new/marketing/2020/4/t77pro/',
'https://topic.autohome.com.cn/new/marketing/2020/4/xiaopeng/'}
关注微信公众号:“数据分析师手记”
数据分析之家联合JEE RAY品牌为粉丝派发福利
添加粉丝福利派发官,领取粉丝福利哦