import urllib.request
import urllib.parse
import random
headers = [{"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"},
{"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6."},
{"User-Agent":"User-Agent:Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11"}
]
header = random.choice(headers)
name = input("请输入贴吧名:")
start = int(input("请输入开始页:"))
end = int(input("请输入结束页:"))
kw = {"kw":name}
kw = urllib.parse.urlencode(kw)
for i in range(start,end):
pn = (i-1)*50
url = "https://tieba.baidu.com/f?"
url = url +kw +"&pn" +str(pn)
rqt = urllib.request.Request(url,headers=header)
res = urllib.request.urlopen(rqt)
html = res.read().decode("utf-8")
filename = "第"+str(i)+"页.html"
with open(filename,"w",encoding="utf-8") as f:
f.write(html)