# -*- coding: utf-8 -*-
import urllib2, string
def baidu_tieba(url, begin_page, end_page):
for i in range(begin_page, end_page+1):
sName = string.zfill(i,5)+'.html'#返回一个六位数字的html文件,如:000001.html. 5为前面补足5个0
print '正在下载第'+ str(i) + '个网页,并将其存储为' + sName + '......'
f = open(sName,'w+') #以读写模式打开:http://www.cnblogs.com/dkblog/archive/2011/02/24/1980651.html
m = urllib2.urlopen(url + str(i)).read()
f.write(m)
f.close()
bdurl = str(raw_input(u'请输入贴吧地址,去掉pn=后面的数字:\n'))
begin_page = int(raw_input(u'请输入开始的页数:\n'))
end_page = int(raw_input(u'请输入终点的页数:\n'))
#
功能:下载对应页码内的所有页面并存储为
html
文件。
#iPostEnd = 10
http://blog.csdn.net/songyu0120/article/details/43488487
import urllib2, string
def baidu_tieba(url, begin_page, end_page):
for i in range(begin_page, end_page+1):
sName = string.zfill(i,5)+'.html'#返回一个六位数字的html文件,如:000001.html. 5为前面补足5个0
print '正在下载第'+ str(i) + '个网页,并将其存储为' + sName + '......'
f = open(sName,'w+') #以读写模式打开:http://www.cnblogs.com/dkblog/archive/2011/02/24/1980651.html
m = urllib2.urlopen(url + str(i)).read()
f.write(m)
f.close()
bdurl = str(raw_input(u'请输入贴吧地址,去掉pn=后面的数字:\n'))
begin_page = int(raw_input(u'请输入开始的页数:\n'))
end_page = int(raw_input(u'请输入终点的页数:\n'))
baidu_tieba(bdurl, begin_page, end_page)
# 操作:输入带分页的地址,去掉最后面的数字,设置一下起始页数和终点页数。
#bdurl = 'http://tieba.baidu.com/p/2296017831?pn='
#iPostBegin = 1
Python中input和raw_input在命令行中读入文件地址的区别请参考:
http://blog.csdn.net/songyu0120/article/details/43488487
2457

被折叠的 条评论
为什么被折叠?



