import urllib
con=urllib.urlopen('http://blog.sina.com.cn/twocold').read()
tail=0
while con.find(r'class="blog_title"',tail)>0:
title=con.find(r'class="blog_title"',tail)
start=con.find(r'http://',title)
tail=con.find(r'.html',start)
url=con[start:tail+5]
fist=url.find(r's/blog')
filename=url[fist+2:]
content=urllib.urlopen(url).read()
openfile=open(filename,"w+")
openfile.write(content)
openfile.close()
else:
print "it is end!"
我的第一个小爬虫
最新推荐文章于 2024-07-27 12:20:46 发布