base_url = 'http://www.baidu.com/'
# 构造header
ua_list = [
'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 ',
'User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
'User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
]
UserAgent = random.choice(ua_list) # 随机一个浏览器
request = urllib2.Request(url=base_url)
# headers = {
# # 'User-Agent' : UserAgent,
# }
request.add_header('User-Agent',UserAgent) #设置一个请求头
print request.get_header('User-agent') # 获取一个请求头,注意大小写
response = urllib2.urlopen(request)
print response.read()
反爬虫首先是防止user-agent,然后是禁ip (频繁请求封ip地址)。