python的http请求和应答_【python】获取http响应

#coding=utf8

importurllib2importchardetimporttracebackimportStringIOimportreimportgzipdefplugin_homepage(data, timeout):

ip= data["ip"]

port= data["port"]if port == 443:

url= "https://%s:%s/" %(ip, port)else:

url= "http://%s:%s/" %(ip, port)

is_timeout, error_reason, code, header, body, title=get_html(url, timeout)

res= {"ip": ip,"port": port,"rsp_header": header,"rsp_body": body,"code": code,"title": title,"is_timeout": is_timeout,"error_reason": error_reason}returnresdefget_html(url, timeout):

user_agent= 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'headers= {'User-Agent': user_agent}

is_timeout=False

error_reason=None

code=None

header=None

body=None

title=Nonetry:

request= urllib2.Request(url, headers=headers)

response= urllib2.urlopen(request, timeout=timeout)

code=response.getcode()

body=response.read()

header=str(response.headers)except urllib2.HTTPError, e: #处理http错误

#print "str(e):%s\nrepr(e):%s\ne:%s\ne.read():%s\n" % (str(e), repr(e), e, e.read())

error_reason =str(e)

body=e.read()

header=e.headersexcepturllib2.URLError, e:printtraceback.print_exc()

error_reason=str(e.reason)if error_reason == "timed out": #判断是否超时

is_timeout =Truereturnis_timeout, error_reason, code, header, body, titleexceptException, e:printtraceback.print_exc()

error_reason=str(e)returnis_timeout, error_reason, code, header, body, titleif notheader:returnis_timeout, error_reason, code, header, body, title#解压gzip

if 'Content-Encoding' in header and 'gzip' in header['Content-Encoding']:

html_data=StringIO.StringIO(body)

gz= gzip.GzipFile(fileobj=html_data)

body=gz.read()#编码转换

try:

html_encode=get_encode(header, body).strip()if html_encode and len(html_encode) < 12:

body= body.decode(html_encode).encode('utf-8')except:pass

#获取title

try:

title= re.search(r'

(.*?)', body, flags=re.I |re.M)iftitle:

title= title.group(1)except:pass

returnis_timeout, error_reason, code, str(header), body, title#获取html编码

defget_encode(header, body):try:

m= re.search(r'| |/)', body, flags=re.I)ifm:return m.group(1).replace('"', '')except:pass

try:if 'Content-Type' inheader:

Content_Type= header['Content-Type']

m= re.search(r'.*?charset=(.*?)(;|$)', Content_Type, flags=re.I)ifm:return m.group(1)except:passchardit1=chardet.detect(body)

encode_method= chardit1['encoding']returnencode_methodif __name__ == "__main__":

data= {"ip": "127.0.0.1", "port": 80}

res= plugin_homepage(data, 3)print res

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值