网站选择桌面壁纸网站的汽车主题:
下面的两个print在调试时打开
#print tag
#print attrs
#!/usr/bin/env python
import re
import urllib2
import HTMLParser
base = "http://desk.zol.com.cn"
path = '/home/mk/cars/'
star = ''
def get_url(html):
parser = parse(False)
request = urllib2.Request(html)
response = urllib2.urlopen(request)
resp = response.read()
parser.feed(resp)
def download(url):
content = urllib2.urlopen(url).read()
format = '[0-9]*\.jpg';
res = re.search(format,url);
print 'downloading:',res.group()
filename = path+res.group()
f = open(filename,'w+')
f.write(content)
f.close()
class parse(HTMLParser.HTMLParser):
def __init__(self,Index):
self.Index = Index;
HTMLParser.HTMLParser.__init__(self)
def handle_starttag(self,tag,attrs):
#print tag
#print attrs
if(self.Index):
if not cmp(tag,'a'):
if(len(attrs) == 4):
if(attrs[0] ==('class','pic')):
#print tag
#print attrs
new = base+attrs[1][1]
print 'found a link:',new
global star
star = new
get_url(new)
else:
if not cmp(tag,'img'):
if(attrs[0] == ('id','bigImg')):
#print tag
#print attrs
Image_url = attrs[1][1]
print 'found a picture:',Image_url
download(Image_url)
if not cmp(tag,'a'):
if (len(attrs) == 4):
if (attrs[1] == ('class','next')):
#print tag
#print attrs
next = base + attrs[2][1]
print 'found a link:',next
if (star != next):
get_url(next)
Index_url = 'http://desk.zol.com.cn/qiche/'
con = urllib2.urlopen(Index_url).read()
Parser_index = parse(True)
Parser_index.feed(con)
仅仅就是抓桌面壁纸网站上的优美的壁纸 。。。