import requests
from bs4 import BeautifulSoup
url='https://www.baidu.com/s?ie=UTF-8&wd=盗梦空间豆瓣'
headers = { # 模拟浏览器请求,可以用 cookie模拟登陆状态
'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
}
b=[]
data = requests.get(url, headers=headers) # 使用requests向服务器发起get请求,通过header模拟浏览器行为
soup = BeautifulSoup(data.text)
print(soup)
for tt in soup.find_all('div',class_="result c-container new-pmd"):
for h3 in tt.find_all('h3',class_='t'):
for a in h3.find_all('a'):
print(a.text,' url:',a['href'])
b.append(a['href'])
#-------------------------------------------------------------------得到该电影的豆瓣网址
# data = requests.get(b[0], headers=headers)
# soup = BeautifulSoup(data.text, 'lxml')
# for div1 in soup.find_all('div',id='comments-section'):
# for div in div1.find_all('div'):
# for h2 in div.find_all('h2'):
# for a in h2.find_all('a'):
# print('url:', a['href'])
#---------------------------------------------------------------------得到豆瓣影评的url
python模拟百度搜索豆瓣电影
最新推荐文章于 2024-07-20 17:12:48 发布