import urllib.request
import urllib.parse
url = "http://maoyan.com/board/4?"
headers ={“User-Agent”:“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36”}
i = 1
while 1:
offset = (i-1)*10
parms = {‘offset’:offset}
parms = urllib.parse.urlencode(parms)
urls = url + parms
request = urllib.request.Request(urls,headers =headers)
response = urllib.request.urlopen(request)
html = response.read().decode(“utf-8”)
with open("第%d页.html" % i, 'a', encoding='utf-8') as f:
print("正在写入第%d页" % i)
f.write(html)
print("第%d页写入完成" % i)
# if not response:
# print("爬取已完成,爬虫自动关闭")
# break
num = input("是否继续爬取(y/n)