pip install beautifulsoup4
访问喜马拉雅专辑页,提取专辑对应的分类类型。
def get_album_type(id):
ALBUM_id_URL = 'https://www.******.com/album/{}'.format(id)
response = requests.get(ALBUM_id_URL, headers=HEADERS)
if response.status_code == http.HTTPStatus.OK:
print(response)
# print(response.text)
# print(response.json())
soup = BeautifulSoup(response.text, 'html.parser')
# print(soup)
breadcrumb_links = soup.select('.bread-crumb-link.xV_')
print(breadcrumb_links)
home_text = breadcrumb_links[0].get_text()
entertainment_text = breadcrumb_links[1].get_text()
alume_title = breadcrumb_links[2].get_text()
print(id, entertainment_text, alume_title)
soup = BeautifulSoup(response.text, 'html.parser')
将HTML文档字符串传递给BeautifulSoup解析器。
select方法选择带有类名bread-crumb-link xV_的所有元素。
breadcrumb_links = soup.select('.bread-crumb-link.xV_')
从选择的元素中提取“娱乐”这个文字信息。
entertainment_text = breadcrumb_links[1].get_text()