def open_url(self, url, data = None, header = {}, encoding = 'utf-8'):
try:
# 定义请求对象
request = urllib2.Request(url)
request.add_data(data)
for key, val in header.items():
request.add_header(key, val)
# 打开页面获得响应对象
tries = 3
while tries:
try:
response = urllib2.urlopen(request)
break
except Exception, msg:
tries = tries - 1
if tries:
continue
else:
raise MailNetworkError(msg)
# 获得最后打开URL
urlLast = response.geturl()
# 获得响应消息头
headerObject = response.headers
# 获取响应正文
streamData = ""
while True:
subData = response.read(2048)
if subData == "":
break
streamData = streamData + subData
#print headerObject
# 判断返回页面的编码机制
if headerObject.has_key('Content-Encoding'):
if cmp(headerObject['Content-Encoding'].strip().upper(), 'gzip'.upper()) == 0:
compresseddata = streamData
compressedstream = StringIO.StringIO(compresseddata)
gzipper = gzip.GzipFile(fileobj=compressedstream)
htmlCode = gzipper.read()
else:
htmlCode = streamData
else:
# 获取返回页面内容
htmlCode = streamData
# 判断返回页面内容的编码,并将其转化为utf-8编码
if headerObject.has_key('Content-Type'):
contentType = headerObject['Content-Type']
if contentType.lower().find('charset=') != -1:
charset = re.search(r'charset=([^;]*)', contentType.lower()).group(1)
if charset != encoding:
try:
htmlCode = htmlCode.decode(charset).encode(encoding)
except:
pass
response.close()
except Exception, msg:
raise MailNetworkError(msg)
return htmlCode, urlLast
urllib2 请求提交完整版
最新推荐文章于 2021-03-18 18:03:47 发布