urllib测试

神迹小卒

于 2022-01-19 12:02:51 发布

阅读量682

点赞数

分类专栏： Python 文章标签： python html 开发语言

本文链接：https://blog.csdn.net/aiqq136/article/details/122577944

版权

Python 专栏收录该内容

39 篇文章 11 订阅

订阅专栏

获取一个get请求

import urllib.request

response=urllib.request.urlopen("http://www.baidu.com")
print(response) #返回一个对象，可以用read函数解析

<http.client.HTTPResponse object at 0x000002888933D970>

import urllib.request

response=urllib.request.urlopen("http://www.baidu.com")
print(response.read().decode('utf-8')) #对获取到的网页进行utf-8中文解码

将所有数据保存到一个txt文件后，将后缀改为html，再打开就可以看到百度的界面了

file:///C:/Users/93983/Desktop/新建文本文档.html

获取一个post请求:模拟用户真实登录

httpbin.org

{
  "args": {},
  "data": "",
  "files": {},
  "form": {},
  "headers": {
    "Accept": "application/json",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
    "Content-Length": "0",
    "Host": "httpbin.org",
    "Origin": "http://httpbin.org",
    "Referer": "http://httpbin.org/",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:95.0) Gecko/20100101 Firefox/95.0",
    "X-Amzn-Trace-Id": "Root=1-61de3a9d-2ec682387d0423344d03124b"
  },
  "json": null,
  "origin": "117.182.125.206",
  "url": "http://httpbin.org/post"
}

import urllib.request

import urllib.parse #解析器
data=bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8")  #bytes()将键值对转为二进制的模式
response=urllib.request.urlopen("http://httpbin.org/post",data=data)    #post访问要传数据进去
print(response.read().decode('utf-8'))

{
"args": {},
"data": "",
"files": {},
"form": {
    "hello": "world"
},
"headers": {
    "Accept-Encoding": "identity",
    "Content-Length": "11",
    "Content-Type": "application/x-www-form-urlencoded",
    "Host": "httpbin.org",
    "User-Agent": "Python-urllib/3.8",        #我们是爬虫
    "X-Amzn-Trace-Id": "Root=1-61de3b8b-332f7b365dd4e94c6e9f5d48"
},
"json": null,
"origin": "117.182.125.206",
"url": "http://httpbin.org/post"
}

访问时间过长，超时处理

import urllib.request
try:
    response=urllib.request.urlopen("http://httpbin.org/get",timeout=0.01)
    print(response.read().decode('utf-8'))
except urllib.error.URLError as e:
    print("time out!")

time out!

返回状态码

import urllib.request

response=urllib.request.urlopen("http://httpbin.org/get")
print(response.status)

200

import urllib.request

response=urllib.request.urlopen("http://douban.com")
print(response.status)

HTTP Error 418: #你被发现是一个爬虫

得到响应表头

import urllib.request

response=urllib.request.urlopen("http://www.baidu.com")
print(response.getheaders())

[('Bdpagetype', '1'), ('Bdqid', '0xa481ca57001e6640'), ('Cache-Control', 'private'), ('Content-Type', 'text/html;charset=utf-8'), ('Date', 'Wed, 12 Jan 2022 02:34:58 GMT'), ('Expires', 'Wed, 12 Jan 2022 02:34:01 GMT'), ('P3p', 'CP=" OTI DSP COR IVA OUR IND COM "'), ('P3p', 'CP=" OTI DSP COR IVA OUR IND COM "'), ('Server', 'BWS/1.1'), ('Set-Cookie', 'BAIDUID=B5C5DE0D5D5A79F3D3CA8EBABED35998:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com'), ('Set-Cookie', 'BIDUPSID=B5C5DE0D5D5A79F3D3CA8EBABED35998; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com'), ('Set-Cookie', 'PSTM=1641954898; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com'), ('Set-Cookie', 'BAIDUID=B5C5DE0D5D5A79F3C7F8D7876F4D4BDB:FG=1; max-age=31536000; expires=Thu, 12-Jan-23 02:34:58 GMT; domain=.baidu.com; path=/; version=1; comment=bd'), ('Set-Cookie', 'BDSVRTM=0; path=/'), ('Set-Cookie', 'BD_HOME=1; path=/'), ('Set-Cookie', 'H_PS_PSSID=35639_35104_31253_35627_34968_34584_35491_35580_35245_35688_26350_35623_35514_35562; path=/; domain=.baidu.com'), ('Traceid', '1641954898019408308211853978169228813888'), ('Vary', 'Accept-Encoding'), ('Vary', 'Accept-Encoding'), ('X-Frame-Options', 'sameorigin'), ('X-Ua-Compatible', 'IE=Edge,chrome=1'), ('Connection', 'close'), ('Transfer-Encoding', 'chunked')]

尝试访问豆瓣

访问豆瓣所需要的全部信息

用测试网址模拟浏览器访问

import urllib.request
import urllib.parse
#url="heeps://www.douban.com"

url="http://httpbin.org/post"    #测试网址

headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
}
data=bytes(urllib.parse.urlencode({'name':'eric'}),encoding='utf-8')
#模拟浏览器发送请求,构建请求对象
req=urllib.request.Request(url=url,data=data,headers=headers,method="POST")

#构建响应对象
response=urllib.request.urlopen(req)
print(response.read().decode("utf-8"))

{
"args": {},
"data": "",
"files": {},
"form": {
    "name": "eric"
},
"headers": {
    "Accept-Encoding": "identity",
    "Content-Length": "9",
    "Content-Type": "application/x-www-form-urlencoded",
    "Host": "httpbin.org",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
    "X-Amzn-Trace-Id": "Root=1-61de4241-466f806f46f051653362756d"
},
"json": null,
"origin": "117.182.125.206",
"url": "http://httpbin.org/post"
}

用get请求访问豆瓣

import urllib.request
url="https://www.douban.com"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
}
req=urllib.request.Request(url=url,headers=headers)
response=urllib.request.urlopen(req)
print(response.read().decode("utf-8"))

神迹小卒

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
urllib测试

获取一个get请求import urllib.requestresponse=urllib.request.urlopen("http://www.baidu.com")print(response) #返回一个对象，可以用read函数解析<http.client.HTTPResponse object at 0x000002888933D970>import urllib.requestresponse=urllib.request.urlopen("http:.
复制链接

扫一扫