python的http请求和应答_【python】获取http响应

最新推荐文章于 2024-04-22 14:03:57 发布

weixin_39554775

最新推荐文章于 2024-04-22 14:03:57 发布

阅读量364

点赞数

文章标签： python的http请求和应答

#coding=utf8

importurllib2importchardetimporttracebackimportStringIOimportreimportgzipdefplugin_homepage(data, timeout):

ip= data["ip"]

port= data["port"]if port == 443:

url= "https://%s:%s/" %(ip, port)else:

url= "http://%s:%s/" %(ip, port)

is_timeout, error_reason, code, header, body, title=get_html(url, timeout)

res= {"ip": ip,"port": port,"rsp_header": header,"rsp_body": body,"code": code,"title": title,"is_timeout": is_timeout,"error_reason": error_reason}returnresdefget_html(url, timeout):

user_agent= 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'headers= {'User-Agent': user_agent}

is_timeout=False

error_reason=None

code=None

header=None

body=None

title=Nonetry:

request= urllib2.Request(url, headers=headers)

response= urllib2.urlopen(request, timeout=timeout)

code=response.getcode()

body=response.read()

header=str(response.headers)except urllib2.HTTPError, e: #处理http错误

#print "str(e):%s\nrepr(e):%s\ne:%s\ne.read():%s\n" % (str(e), repr(e), e, e.read())

error_reason =str(e)

body=e.read()

header=e.headersexcepturllib2.URLError, e:printtraceback.print_exc()

error_reason=str(e.reason)if error_reason == "timed out": #判断是否超时

is_timeout =Truereturnis_timeout, error_reason, code, header, body, titleexceptException, e:printtraceback.print_exc()

error_reason=str(e)returnis_timeout, error_reason, code, header, body, titleif notheader:returnis_timeout, error_reason, code, header, body, title#解压gzip

if 'Content-Encoding' in header and 'gzip' in header['Content-Encoding']:

html_data=StringIO.StringIO(body)

gz= gzip.GzipFile(fileobj=html_data)

body=gz.read()#编码转换

try:

html_encode=get_encode(header, body).strip()if html_encode and len(html_encode) < 12:

body= body.decode(html_encode).encode('utf-8')except:pass

#获取title

try:

title= re.search(r'

(.*?)', body, flags=re.I |re.M)iftitle:

title= title.group(1)except:pass

returnis_timeout, error_reason, code, str(header), body, title#获取html编码

defget_encode(header, body):try:

m= re.search(r'| |/)', body, flags=re.I)ifm:return m.group(1).replace('"', '')except:pass

try:if 'Content-Type' inheader:

Content_Type= header['Content-Type']

m= re.search(r'.*?charset=(.*?)(;|$)', Content_Type, flags=re.I)ifm:return m.group(1)except:passchardit1=chardet.detect(body)

encode_method= chardit1['encoding']returnencode_methodif __name__ == "__main__":

data= {"ip": "127.0.0.1", "port": 80}

res= plugin_homepage(data, 3)print res

weixin_39554775

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python的http请求和应答_【python】获取http响应

#coding=utf8importurllib2importchardetimporttracebackimportStringIOimportreimportgzipdefplugin_homepage(data, timeout):ip= data["ip"]port= data["port"]if port == 443:url= "https://%s:%s/" %(ip, port)e...
复制链接

扫一扫