import urllib.request
import urllib.parse
# https://tieba.baidu.com/f?kw=%E8%BF%AA%E4%B8%BD%E7%83%AD%E5%B7%B4&ie=utf-8&pn=350
# &pagelets=frs-list%2Fpagelet%2Fthread&pagelets_stamp=1589720343578
def creat_request(page):
url = 'https://tieba.baidu.com/f?kw=%E8%BF%AA%E4%B8%BD%E7%83%AD%E5%B7%B4&ie=utf-8&'
data = {
'pn': (page - 1) * 50
}
data = urllib.parse.urlencode(data)
url = url + data
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
}
request = urllib.request.Request(url=url, headers=headers)
return request
def get_content(request, page):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def download(content):
with open('tieba/{}.html'.format(page), 'w', encoding='utf-8') as fp:
fp.write(content)
if __name__ == '__main__':
start_page = int(input('请输入起始页码:'))
end_page = int(input('请输入结束页码:'))
for page in range(start_page, end_page + 1):
request = creat_request(page)
content = get_content(request, page)
download(content)
爬虫---爬取百度贴吧---迪丽热巴吧---get请求
最新推荐文章于 2024-06-14 09:50:01 发布