代码:
# -*- coding: utf-8 -*-
# @Time : 2020/4/14 20:40
# @Author : Oneqq
# @File : 03-gethtml.py
# @Software: PyCharm
from urllib.request import Request, urlopen
from urllib.parse import urlencode
from fake_useragent import UserAgent
def get_html(url):
headers = {
"User-Agent": UserAgent().chrome
}
request = Request(url, headers=headers)
response = urlopen(request)
print(response.read().decode())
return response.read()
def save_html(filename, html_bytes):
with open(filename, "wb") as f:
f.write(html_bytes)
def main():
content = input("请输入搜索的内容:")
num = input("请输入下载多少页:")
base_url = "https://tieba.baidu.com/f?ie=utf-8&fr=search&{}"
for page in range(int(num)):
args = {
"kw": content,
"pn": page*50
}
filename = "第"+str(page+1)+"页.html"
args = urlencode(args)
print("正在下载"+filename)
html_bytes = get_html(base_url.format(args))
save_html(filename, html_bytes)
if __name__ == '__main__':
main()
结果:

5173

被折叠的 条评论
为什么被折叠?



