用BeautifulSoup处理html文件
#!/usr/bin/env python
# coding=utf-8
import urllib2
import urllib
from bs4 import BeautifulSoup
import re
def getContent(url):
content = urllib2.urlopen(url).read()
soup=BeautifulSoup(content)
global siteUrls
siteUrls = soup.findAll('li',attrs={'class':'span3'})
for i in siteUrls:
file=i.findAll('img')
for t in file:
id=t.get('data-id')
name=t.get('data-src')
imgpath='H:\python_learn\photo/%s.jpg' % id
urllib.urlretrieve(name,imgpath)
for i in xrange(1,7):
url='http://www.dbmeizi.com/?p=%s' % i
getContent(url)