1、环境:系统Win7 x64,Python 2.7。

2、示例代码:

#encoding: utf-8
#author: walker
#date: 2014-11-26
#summary: 使用BeautifulSoup获取url及其内容

import sys, re, requests, urllib
from bs4 import BeautifulSoup

reload(sys)   
sys.setdefaultencoding('utf8')  

#给定关键词,获取百度搜索的结果	
def GetList(keyword):
	keyword = unicode(keyword, 'gb18030')
	dic = {'wd': keyword}
	urlwd = urllib.urlencode(dic)
	print(urlwd)
	sn = requests.Session()
	url = 'http://www.baidu.com/s?ie=utf-8&csq=1&pstg=22&mod=2&isbd=1&cqid=9c0f47b700036f17&istc=8560&ver=0ApvSgUI_ODaje7cp4DVye9X2LZqWiCPEIS&chk=54753dd5&isid=BD651248E4C31919&'
	url += urlwd
	url += '&ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&rsv_pq=b05765d70003b6c0&rsv_t=ce54Z5LOdER%2Fagxs%2FORKVsCT6cE0zvMTaYpqpgprhExMhsqDACiVefXOze4&_ck=145469.1.129.57.22.735.37'
	r = sn.get(url=url)
	soup = BeautifulSoup(r.content)		#r.text很可能中文乱码
	rtn = soup.find('div',id='content_left').find_all(name='a',href=re.compile('baidu.com'))
	for item in rtn:
		print(item.getText().encode('gb18030'))
		print(item['href'])
		
if __name__ == '__main__':
	keyword = '正则表达式'
	GetList(keyword)

3、运行结果截图:

wKiom1R1R6qwvklLAAX4LcZ3AkI940.jpg


【相关阅读】


*** walker * 2014-11-26 ***