import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def gethtml(url,js=False):
if(js==False):
return gethtml1(url)
elif (js==True):
return gethtml2(url)
else:
print(js参数错误)
def gethtml1(url):
res=requests.get(url)
res.raise_for_status()
r = res.text
soup=BeautifulSoup(r,'html.parser')
return soup
def gethtml2(url):
browser = webdriver.Chrome() # 调用本地的Chrome浏览器
browser.get(url) # 请求页面,会打开一个浏览器窗口
v = browser.page_source # 获得页面代码
browser.quit() # 关闭浏览器
soup=BeautifulSoup(v,'html.parser')
return soup
if __name__ == '__main__':
url='https://www.baidu.com'
print(gethtml(url))
print(gethtml(url,js=False))
print(gethtml(url,js=True))
获取网页的html文本(用selenium+chrome headless进行js异步加载内容),返回BeautifulSoup的soup对象
最新推荐文章于 2023-12-06 18:33:17 发布