#coding=utf-8
from urllib import urlopen
import time
url = ['']*350
page = 1
link = 1#链接变量
while page<= 4:
arti = urlopen('http://blog.sina.com.cn/s/articlelist_1227636382_0_'+str(page)+'.html').read()
i = 0
title = arti.find(r'<a title=')
href = arti.find(r'href=',title)
html = arti.find(r'.html',href)
while title != -1 and href != -1 and html != -1 and i<40:
url[i] = arti[href+6:html+5]
print link,' ',url[i]
title = arti.find(r'<a title=',html)
href = arti.find(r'href=',title)
html = arti.find(r'.html',href)
content = urlopen(url[i]).read()
filename = url[i][-26:]
print ' ',filename
open(r'yishu/'+url[i][-26:],'w+').write(content)
print 'downloading',url[i]
i = i + 1
link = link+1#发现了多少链接地
time.sleep(1)
else:
print page,'find end'
page=page+1
else:
print'all find'