import bs4
from bs4 import BeautifulSoup
import re
import sys
import os
import chardet
import urllib2
line='http://so.zhulang.com/search.php?k=%E9%BB%91%E9%81%93&t=zh&allmatch=1'
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
request = urllib2.Request(line,headers = headers)
r = urllib2.urlopen(request)
content = r.read()
# BeautifulSoup 接受一个字符串参数
soup = BeautifulSoup(content)
head=soup.head
aline=soup.findAll('a')
regex='.*'
result=[]
for line in aline:
print line.encode('utf-8')
print line.text
url=re.findall(regex+'/Book/[0-9]+/Index.html'+regex+">"+('遮天').decode('GB2312').encode('utf-8')+"<",line.encode('utf-8'))
if len(url)!=0:
#print 'aa'
result.append(url)
BeautifulSoup入门代码
最新推荐文章于 2023-05-14 09:00:00 发布