BeautifulSoup的简单用法
#coding=utf-8import urllib
import urllib2
import cookielib
from bs4 import BeautifulSoup
import re
url ="http://www.baidu.com"
try:
request = urllib2.Request(url, data = None)
response = urllib2.urlopen(request, timeout= 2)
except urllib2.HTTPError, e:
print e.code
except urllib2.URLError, e:
print e.reason
except:
print "Error"
data = response.read()
soup = BeautifulSoup(data,"lxml")
for tag in soup.find_all('div',class_="qrcode-text"):
for item in tag.children:
print item
find_all('div',class_="qrcode-text")方法
1、参数可以是name参数,如:’a’ ,’div’,[‘a’,’p’],re.compie(‘^b’),True等等
2、参数可以是属性,比如:id=”link2”,href=re.compile(‘baidu’)等等
3、参数还可以是text,用于匹配Tag的string,如text=”baidu”
4、还可以混合起来使用,如上面程序所示
5、tag.children:表示tag的所有子节点,返回的是类list结构