爬虫常用的requests库的学习使用

import requests

response = requests.get('https://www.baidu.com')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)
print(response.cookies)
import requests

response = requests.get('http://httpbin.org/get')
print(response.text)
out:
{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.22.0", 
    "X-Amzn-Trace-Id": "Root=1-5ea294c1-d93fdab007f1da937cc4752d"
  }, 
  "origin": "120.243.219.224", 
  "url": "http://httpbin.org/get"
}
import requests

data = {
    'name':'germey',
    'age':22
}
response = requests.get('http://httpbin.org/get',params=data)
print(response.text)
out:
{
  "args": {
    "age": "22", 
    "name": "germey"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.22.0", 
    "X-Amzn-Trace-Id": "Root=1-5ea295d4-cbe65534628d5d25cbe08be9"
  }, 
  "origin": "120.243.219.224", 
  "url": "http://httpbin.org/get?name=germey&age=22"
}
import requests
response = requests.get('http://httpbin.org/get?name=germey&age=22')
print(response.text)
out:
{
  "args": {
    "age": "22", 
    "name": "germey"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.22.0", 
    "X-Amzn-Trace-Id": "Root=1-5ea2969e-979e9ce4eb21e150a642b7a0"
  }, 
  "origin": "120.243.219.224", 
  "url": "http://httpbin.org/get?name=germey&age=22"
}
# 获取cookie
import requests

response = requests.get('https://www.zhihu.com')
print(response.cookies)
for key,value in response.cookies.items():
    print(key+"="+value)
out:
<RequestsCookieJar[<Cookie _xsrf=tiI9FY2iowanTyCFwEpW1D1Jtr6sickh for .zhihu.com/>]>
_xsrf=tiI9FY2iowanTyCFwEpW1D1Jtr6sickh

模拟登录:

import requests

requests.get('http://httpbin.org/cookies/set/number/123456789')
response = requests.get('http://httpbin.org/cookies')
print(response.text)  # 因为两次是不同的游览器进行的请求,所以得不到原来的那个cookie值
out:
{
  "cookies": {}
}
import requests
#  模拟登录验证
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
response = s.get('http://httpbin.org/cookies')
print(response.text)   # 用requests.Session()来进行会话维持,所以能得到之前那次请求的cookie值
{
  "cookies": {
    "number": "123456789"
  }
}

证书验证:

import requests

response = requests.get('https://www.12306.cn')
print(response.status_code)
out:
200
import requests
from requests.packages import urllib3
urllib3.disable_warnings()  # 用来消除警告信息
response = requests.get('https://www.12306.cn',verify=False) # 利用参数verify来设定是否验证
print(response.status_code)
out:
200

代理设置:

第一种:
import requests

proxies = {
    'http':'http://127.0.0.1:9742',
    'https':'https://127.0.0.1:9742',
}
response = requests.get('https://www.taobao.com',proxies=proxies)
print(response.status_code)

第二种:
import requests

proxies = {
    'http':'http://user:password@127.0.0.1:9742',
}
response = requests.get('https://www.taobao.com',proxies=proxies)
print(response.status_code)


第三种:
pip install requests[socks]

import requests

proxies = {
    'http':'socks5://127.0.0.1:9742',
    'https':'socks5://127.0.0:9742'
}

response = requests.get('https://www.taobao.com',proxies=proxies)
print(response.status_code)

超时设置

import requests

response = requests.get('https://www.taobao.com',timeout=1)
print(response.status_code)
out:
200

异常处理:

import requests
from requests.exceptions import ReadTimeout, ConnectionError, RequestException
try:
    response = requests.get('http://httpbin.org/get',timeout=0.5)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
except ConnectionError:
    print('Connection error')
except RequestException:
    print('Error')
out:
Connection error
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值