Python3:这是今天学习的,第一个网页爬虫,可以爬去百度贴吧的十个网页并存储起来
import urllib.request
def baidu_tieba(url,begin_page,end_page):
for i in range(begin_page,end_page+1):
sName=str(i).zfill(5)+'.html'
print('正在下载第'+str(i)+'个网页,并将其存储为'+sName+'.....')
m=urllib.request.urlopen(url+str(i)).read()
with open(sName,'wb') as file:
file.write(m)
bdurl=str('http://tieba.baidu.com/p/4785143088?pn=')
begin_page=1
end_page=10
baidu_tieba(bdurl,begin_page,end_page)