视频地址:
http://edu.51cto.com/lesson/id-12393.html
下载博客文章实例
源码:
import urllib
import time
#下载博客所有文章
i = 0
url = ['']*50
con = urllib.urlopen('http://blog.sina.com.cn/s/articlelist_3973495073_0_1.html').read()
title = con.find(r'<a title=')
href = con.find(r'href=', title)
html = con.find(r'.html', href)
while title != -1 and href != -1 and html != -1 and i < 50:
url[i] = con[href + 6:html + 5]
print url[i]
title = con.find(r'<a title=', html)
href = con.find(r'href=', title)
html = con.find(r'.html', href)
i = i + 1
else:
print 'find end!'
j = 0
while j < 50:
content = urllib.urlopen(url[j]).read()
open(r'hanhan/'+url[j][-26:],'w+').write(content)
print 'downloading', url[j]
j = j + 1
time.sleep(1)
else:
print 'download articles finished!'