大家好,本文将围绕python 爬虫抓取网页数据导出excel展开说明,python抓取网页数据并写入txt是一个很多人都想弄明白的事情,想搞清楚python爬取网页数据存入excel需要先了解以下几个事情。
from bs4 import BeautifulSoup
import urllib.request
url = 'https://movie.douban.com/top250'
# 获取网页中所有的"下一页"的链接地址url
def get_links(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
}
request = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(request)
html = response.read()
soup = BeautifulSoup(html, 'lxml')
pages = soup.find('div', class_='paginator'