网站选择桌面壁纸网站的汽车主题:
下面的两个print在调试时打开
#print tag
#print attrs
#!/usr/bin/env python
import re
import urllib2
import HTMLParser
base = "http://desk.zol.com.cn"
path = '/home/mk/cars/'
star = ''
def get_url(html):
parser = parse(False)
request = urllib2.Request(html)
response = urllib2.urlopen(request)
resp = response.read()
parser.feed(resp)
def download(url):
content = urllib2.urlopen(url).read()
format = '[0-9]*\.jpg';
res = re.search(format,url);
print 'downloading:',res.group()
filename = path+res.group()
f = open(filename,'w+')
f.write(content)
f.close()
class parse(HTMLParser.HTMLParser):
def __init__(self,Index):
self.Index = Index;
HTMLParser.HTMLParser.__init__(self)
def handle_starttag(self,tag,attrs):
#print tag
#print attrs
if(self.Index):
if not cmp(tag,'a'):
if(len(attrs) == 4):
if(attrs[0] ==('class','pic')):
#print tag
#print attrs
new = base+attrs[1][1]
print 'found a link:',new
global star
star = new
get_url(new)
else:
if not cmp(tag,'img'):
if(attrs[0] == ('id','bigImg')):
#print tag
#print attrs
Image_url = attrs[1][1]
print 'found a picture:',Image_url
download(Image_url)
if not cmp(tag,'a'):
if (len(attrs) == 4):
if (attrs[1] == ('class','next')):
#print tag
#print attrs
next = base + attrs[2][1]
print 'found a link:',next
if (star != next):
get_url(next)
Index_url = 'http://desk.zol.com.cn/qiche/'
con = urllib2.urlopen(Index_url).read()
Parser_index = parse(True)
Parser_index.feed(con)
仅仅就是抓桌面壁纸网站上的优美的壁纸 。。。
相关报道:
> Web开发 > ASP.Net > 正文Net读取Excel 返回DataTable2014-01-13 来源:℡メ㏑╭ァ小凯 我要投稿 using System; using Microsoft.SharePoint; using Microsoft.SharePoint.WebControls; using System.Data; using System.IO; using System.Linq; using Sys 更多
1、准备工作 (1)确认安装了Python解释器,版本2 4到3 4均可。 (2)注意Pycharm有两个发布版本:社区版和专业版,详见 Edition Comparison Matrix 2、初始化 更多