python requests库基本使用介绍

Requests是用python语言基于urllib编写的,采用的是Apache2 Licensed开源协议的HTTP库。与urllib相比,Requests更加方便,可以节约我们大量的工作,建议爬虫使用Requests库。

1. 发起基本get请求并传入参数
import requests

def get_html(url):
    # 这个传入的参数是拼接在url中的一组参数
    param = {"wd": "c++"}
    html = requests.get(url,params=param)
    if html.status_code == 200:
        html.encoding="utf8"
        print(html.text)
    else:
        print("ERROR",html)
if __name__ == '__main__':
    url = "http://www.baidu.com/s"
    get_html(url)
2.发起post请求并提交请求体
# post发送表单
import requests


def post_html(url):
    data = {"name": "python", "pwd": "123"}
    html = requests.post(url, data=data)
    if html.status_code == 200:
        html.encoding = "utf8"
        print(html.text)
    else:
        print("get error:" + html.url)


if __name__ == '__main__':
    url = "http://httpbin.org/post"
    post_html(url)

3.获取json数据
# 获取json数据
import requests

def json_html(url):
    html = requests.get(url)
    if html.status_code == 200:
        html.encoding = "utf8"
        for news in html.json():
            print(news['id'],news['title'])
    else:
        print("get error:" + html.url)

if __name__ == '__main__':
    url = "https://news.qq.com/ext2020/apub/json/prevent.new.json"
    json_html(url)
4.图片下载
import requests
from uuid import uuid4
# 图片下载
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                  " AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
    "Referer": "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&"
               "st=-1&fr=&sf=1&fmq=1567133149621_R&pv=&ic=0&nc=1&z=0&hd=0&latest=0&copyright=0&se"
               "=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=%E5%A3%81%E7%BA%B8",
    "Cookie": "BIDUPSID=A9CA40CD7509DCDBCB2358D36CBB2D91; BAIDUID=E0E4E5065F8DB6020266232584DB75EE:FG=1;"
              " PSTM=1595728572; BDUSS=BwUW5IRjlzcHhLV3E0azBpbkdjTHpGbHhIejFLMG1XanIyY3pJTEVDLXUtVVJmRVF"
              "BQUFBJCQAAAAAAAAAAAEAAAB4xj02QWxwYWNhZGFoYdi8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
              "AAAAAAAAAAAAAAAAAAAAAAAAAK5sHV-ubB1ffl; BDRCVFR[xoix5KwSHTc]=9xWipS8B-FspA7EnHc1QhPEUf; de"
              "lPer=0; PSINO=2; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BDRCVFR[X_XKQks0S63]"
              "=mk3SLVN4HKm; userFrom=www.baidu.com; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; firstShowTip=1"
}


def post_url(url):
    html = requests.get(url, headers=headers)
    if html.status_code == 200:
        html.encoding = "utf8"
        content = html.json()['data']
        # print(content)
        for item in content:
            if len(item) > 0:
                download(item['middleURL'])


def download(url):
    img = requests.get(url, headers=headers)
    print("正在下载{}".format(url))
    with open("./images/{}.jpg".format(uuid4()), 'wb') as f:
        # 每次下载225字节
        for chunk in img.iter_content(225):
            f.write(chunk)


if __name__ == '__main__':
    url = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E5%A3%81%E7%BA%B8&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=0&ic=0&hd=0&latest=0&copyright=0&word=%E5%A3%81%E7%BA%B8&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=wallpaper&pn=30&rn=30&gsm=1e&1595855122224="
    post_url(url)

5.使用session维持会话
# 使用session维持会话,减少频繁发起请求
import requests
from uuid import uuid4
# 图片下载
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                  " AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
    "Referer": "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&"
               "st=-1&fr=&sf=1&fmq=1567133149621_R&pv=&ic=0&nc=1&z=0&hd=0&latest=0&copyright=0&se"
               "=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=%E5%A3%81%E7%BA%B8",
    "Cookie": "BIDUPSID=A9CA40CD7509DCDBCB2358D36CBB2D91; BAIDUID=E0E4E5065F8DB6020266232584DB75EE:FG=1;"
              " PSTM=1595728572; BDUSS=BwUW5IRjlzcHhLV3E0azBpbkdjTHpGbHhIejFLMG1XanIyY3pJTEVDLXUtVVJmRVF"
              "BQUFBJCQAAAAAAAAAAAEAAAB4xj02QWxwYWNhZGFoYdi8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
              "AAAAAAAAAAAAAAAAAAAAAAAAAK5sHV-ubB1ffl; BDRCVFR[xoix5KwSHTc]=9xWipS8B-FspA7EnHc1QhPEUf; de"
              "lPer=0; PSINO=2; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BDRCVFR[X_XKQks0S63]"
              "=mk3SLVN4HKm; userFrom=www.baidu.com; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; firstShowTip=1"
}

session = requests.session()
session.headers = headers

def post_url(url):
    html = session.get(url)
    if html.status_code == 200:
        html.encoding = "utf8"
        content = html.json()['data']
        # print(content)
        for item in content:
            if len(item) > 0:
                download(item['middleURL'])


def download(url):
    img = session.get(url)
    print("正在下载{}".format(url))
    with open("./images/{}.jpg".format(uuid4()), 'wb') as f:
        # 每次下载225字节
        for chunk in img.iter_content(225):
            f.write(chunk)


if __name__ == '__main__':
    url = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E5%A3%81%E7%BA%B8&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=0&ic=0&hd=0&latest=0&copyright=0&word=%E5%A3%81%E7%BA%B8&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=wallpaper&pn=30&rn=30&gsm=1e&1595855122224="
    post_url(url)

6.忽略https证书验证
# 忽略https证书验证
import requests

def get_url(url):
    html = requests.get(url,verify=False)
    if html.status_code == 200:
        print("OK")
    else:
        print("Error:" + url)

if __name__ == '__main__':
    url = "https://kennethreitz.org"
    get_url(url)
7.使用IP代理
# 使用IP代理
import requests

proxies = {
    "http": "http://118.212.104.230:9999",
    "https": "http://171.35.221.59:9000"
}


def get_url(url):
    html = requests.get(url, proxies=proxies, timeout=3,verify=False)
    if html.status_code == 200:
        print("ok")
    else:
        print("get error:" + url)


if __name__ == '__main__':
    url = "https://kennethreitz.org"
    get_url(url)

8.post上传文件
# post上传文件
import requests

file = {
    'file': ('demo1', open('demo1.py', 'rb'), 'application/html', {'Expires': '0'})
}


def post_html(url):
    html = requests.post(url, files=file)
    if html.status_code == 200:
        html.encoding = "utf8"
        print(html.text)
    else:
        print("get error:" + html.url)


if __name__ == '__main__':
    url = "http://httpbin.org/post"
    post_html(url)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值