# coding=utf-8
from bs4 import BeautifulSoup
import requests
# 定义一个获取url页面下label标签的attr属性的函数
def getHtml(url, label, attr):
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text
soup = BeautifulSoup(html, 'html.parser');
for target in soup.find_all(label):
try:
value = target.get(attr)
except:
value = ''
if value:
print(value)
url = 'https://baidu.com/'
label = 'a'
attr = 'href'
getHtml(url, label, attr)
python爬虫获取任意页面的标签和属性(包括获取a标签的href属性)
最新推荐文章于 2021-05-13 16:24:20 发布