09 Requests

pip3 install requests

验证完成安装 命令行下 import requests 无报错信息即安装成功

一般常用的测试网页为 http://httpbin.org/get

import requests
response = requests.get('http://www.baidu.com')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)   #网页信息  
print(response.cookies)

  

get请求

import requests
response = requests.get('http://httpbin.org/get') #测试网页
print(response.text)

  

 

带参数的get请求

#通过params参数构建url地址
#params前面是逗号 import requests data = { 'name':'liu', 'age':22 } response = requests.get('http://httpbin.org/get',params=data) print(response.text) 打印结果 { "args": { "age": "22", "name": "liu" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Connection": "close", "Host": "httpbin.org", "User-Agent": "python-requests/2.20.0" }, "origin": "210.77.180.38", "url": "http://httpbin.org/get?name=liu&age=22" }

  

解析json

import requests
import json
response = requests.get('http://httpbin.org/get')
print(response.text)
print(response.json())
print(json.loads(response.text)) #两次返回结果是一样的
print(type(response.json()))  
打印结果
{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.20.0"
  }, 
  "origin": "210.77.180.38", 
  "url": "http://httpbin.org/get"
}

{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
<class 'dict'>

  

获取二进制数据

import requests
response = requests.get('http://github.com/favicon.ico')
print(type(response.text),type(response.content)) 
print(response.text)
print(response.content)
打印结果

<class 'str'> <class 'bytes'>
........
网页源码和图片的二进制字节

  

下载图片

import requests
response = requests.get('http://github.com/favicon.ico')
with open('favicon.ico','wb') as f:
          f.write(response.content) #content 获取二进制数据
          f.close()

运行后可在路径下找到下载的图片

   

#以访问知乎为例
import requests
response = requests.get('https://www.zhihu.com/explore')
print(response.text)
打印结果

<html>
<head><title>400 Bad Request</title></head>
<body bgcolor="white">
<center><h1>400 Bad Request</h1></center>
<hr><center>openresty</center>
</body>
</html>

 通过加headers来访问

import requests
headers = {
    'User-Agent':..................自行添加.....................
}
response = requests.get('https://www.zhihu.com/explore',headers=headers)
print(response.text)

  

基本post请求

import requests
data = {
    'name':'liu',
    'age':22
}
response = requests.post('http://httpbin.org/post',data = data)
print(response.text)
打印结果

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "age": "22", 
    "name": "liu"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "15", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.20.0"
  }, 
  "json": null, 
  "origin": "210.77.180.38", 
  "url": "http://httpbin.org/post"
}

  添加headers  与get方法一样

import requests
data = {
    'name':'liu',
    'age':22
}
headers = {'User-Agent':.............}
response = requests.post('http://httpbin.org/post',data = data,headers=headers)
print(response.json())

  

响应

response 属性

import requests
response = requests.get('http://www.baidu.com')
print(type(resopnse.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)
print(type(response.history),response.history)
打印结果

<class 'int'> 200
<class 'requests.structures.CaseInsensitiveDict'> {'Content-Type': 'text/html', 'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Transfer-Encoding': 'chunked', 'Server': 'bfe/1.0.8.18', 'Content-Encoding': 'gzip', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Last-Modified': 'Mon, 23 Jan 2017 13:27:36 GMT', 'Date': 'Thu, 08 Nov 2018 07:18:47 GMT', 'Pragma': 'no-cache', 'Connection': 'Keep-Alive'}
<class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
<class 'str'> http://www.baidu.com/
<class 'list'> []

  

状态码判断

import requests
response = requests.get('http://www.baidu.com')
exit() if not response.status_code == requests.codes.ok else print('访问成功')
exit() if not response.status_code ==200 else print('访问成功') #可以直接用状态码200替换
打印结果
访问成功
访问成功

  

高级操作

文件上传

import requests
files = {'file':open('favicon.ico','rb')}
response = requests.post('http://httpbin.org/post',files = files)
print(response.text)

 

获取cookie

import requests
response = requests.get('http://www.baidu.com')
print(response.cookies)
for key,value in response.cookies.items():
    print(key + '='+ value)

返回结果

<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
BDORZ=27315

  

会话维持

模拟登陆

import requests
requests.get('http://httpbin.org/cookies/set/number/123456') #设置cookies
response = requests.get('http://httpbin.org/cookies')
print(response.text)
打印结果

{
  "cookies": {}
}

#运行结果cookies是个空 

 因为设置set cookies的浏览器和访问获取get的浏览器是两个相对独立的访问行为,所以获取的cookies为空。所以要通过requeset库中session函数实现

通过session对象在同一个浏览器中发起两次get请求来实现

 

import requests
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456')
response = s.get('http://httpbin.org/cookies')
print(response.text)
打印结果

{
  "cookies": {
    "number": "123456"
  }
}

  

证书验证

import requests
response = requests.get('https://www.12306.cn')
print(response.status_code)

  

 

import requests
response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true
print(response.status_code)
打印结果
200
c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)
c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)

 因为会有警告信息 所以需要引入urllib3中的告警不可用设置 即可取消

import requests
from requests.packages import urllib3
urllib3.disable_warnings() #包中的告警不可用
response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true
print(response.status_code)

打印结果
200

  

添加本地证书信息

import requests
response = requests.get('https://www.12306.cn',cert =( '/path/server.crt','/path/key')
#将本地的证书路径添加进去后不再有告警

  

代理设置

import requests
proxies = {
    'http':'http;//代理地址',
    'https':'https://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)

 

#有用户名和密码的情况
import requests
proxies = {
    'http';'http://user:password@代理地址',
    'https://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)

  

#如果不是http或https代理 需要设置socks代理
import requests #pip3 install 'requests(socks)' proxies = { 'http':'socks5://代理地址', 'https://socks5://..代理地址' } response = requests.get('http://xxxxx.com',proxies=proxies) print(response.status_code)

  

  超时设置 #设置time out

import requests
response = requests.get('https://www.taobao.com',timeout = 1) #一秒内应答
print(response.status_code)

  

如果网站1秒内未响应,则会提示timeout。提示信息后会继续运行

异常处理

import requests
from requests.exceptions import ReadTimeout
try:
    response = requests.get('https://httpbin.org/get',timeout = 0.5)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
打印结果
Timeout

#通过try 来捕获异常信息

  

认证设置 遇到需要输入用户名密码的情况

import requests
from requests.auth import HTTPBasicAuth
r = requests.get('http://.123..23',auth = HTTPBasicAuth('user','123'))
print(r.status_code)

 

import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
try:
    response = requests.get('http://httpbin.org/get',timeout = 0.6)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
except HTTPError:
    print('HTTPError')
except RequestException:
    print('Error')

  

 

转载于:https://www.cnblogs.com/liupingtao/p/9929728.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
使用Python的requests库可以方便地实现股票信息的抓取。首先,我们需要导入requests库。 ```python import requests ``` 接下来,我们需要确定目标网站,以获取股票信息。这里以新浪财经的股票数据为例,使用新浪财经的股票接口获取数据。 ```python url = "http://hq.sinajs.cn/list=sz000001" ``` 然后,我们使用requests库发送网络请求获取股票数据。 ```python response = requests.get(url) ``` 通过判断返回的状态码,可以确保请求成功。 ```python if response.status_code == 200: # 请求成功 data = response.text # 处理数据 # ... else: print("请求失败") ``` 接下来,我们可以对获取到的数据进行处理。新浪财经的股票数据是以字符串的形式返回的,我们可以根据数据的格式进行处理。 ```python # 假设获取到的数据格式如下:var hq_str_sz000001="白云机场,18.000,17.960,18.180,18.400,17.910,18.180,18.190,77243693,1392362668.500,2038657,18.180,438315,18.170,522969,18.160,787210,18.150,1027504,18.140,2021-09-30,15:44:05,11,0,0.000,0.010,0.000,0.000,0.000,0.870,5.320,0.000,6.160,5.680,4.280,2021-09-30 15:44:05,3,1151"; # 数据以逗号分隔,第2个元素是股票名称,第3个元素是开盘价,第4个元素是收盘价,以此类推 # 使用split方法按逗号分隔数据 stock_data = data.split(",") # 获取股票名称 stock_name = stock_data[0].split("=")[1] # 获取开盘价 opening_price = float(stock_data[2]) # 获取收盘价 closing_price = float(stock_data[3]) # ... ``` 最后,我们可以根据需要做进一步的处理,比如保存数据、展示数据等。 总结起来,通过使用Python的requests库,我们可以方便地实现股票信息的抓取,并对数据进行进一步处理。当然,具体的实现还会根据目标网站的接口和数据格式进行调整。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值