requests的使用

一· 概念

requests作为爬虫的基础库,在我们快速爬取和反爬破解中起到很重要的作用, 其中的知识点大概有以下几个方面:

二· 内容

一, request:

1- requests.get
… get请求获取数据
2- requests.post
…post请求获取数据

二, response:

1- response.text
. 响应体str类型
2- response.encoding
从HTTP header中获取响应内容的编码方式
3- response.content
响应体bytes类型
4- response.staus_code
响应状态码
5- resposne.request.headers
响应对应的请求头
6- response.headers
响应头
7- response.request.codkie
响应对应请求的cookie
8- response.cookies
响应的cookie(经过了set-cookie动作)
9- response.url
获取访问的url
10- response.json()
获取json数据 得到的内容为字典(如果接口响应体的格式是json格式时)
11- response.ok
如果status_code 小于200则返回True, 否则返回False

示例1- requests发送get请求获取json数据

1- 使用requests获取json数据。 配置cookie和user-agent

import requests
import json
from lxml import etree
import pandas as pd



def get_pro(url):
    headers = {
        "User-Agent": "PostmanRuntime",
        "Host": "www.ti.com.cn",
        "Accept": "*/*",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Cookie": "user_pref_language='zh-CN'; user_pref_currency='USD'    ; last-domain=www.ti.com.cn; CONSENTMGR=ts:1663841657843%7Cconsent:true; tiSessionID=018364b0e3f4001314def085217c0506d001606500bd0; Qs_lvt_470423=1663841658; _gid=GA1.3.938983574.1663841661; _ga=GA1.3.1093917085.1663841661; _pxvid=53ce593f-3a5f-11ed-9086-4a5457546470; _gcl_au=1.1.1202026322.1663841671; __adroll_fpc=98c78cff267aae66ab4a0d83ec58a750-1663841671904; ELOQUA=GUID=4BEDAA278D4541A38864991D291B83A8; __ar_v4=QFXRHQEHOJDMLHSLFIWCLO%3A20220922%3A4%7C2XNKMR6P4VGD5MD3ZP4SQR%3A20220922%3A4%7CG3YHLXUICZC3XKDXYVVLO4%3A20220922%3A4; bm_sz=D41B60D0ECA1409E6269C65B0E1D8A7D~YAAQe0InO1HJUV2DAQAAlX7GZBEy8cD3cfIKOSojZGgbg/3W35pnD/IimJLSz2DPb7V4nP2QVr7dSbziplcts1ToAzEN/hKl3KP3iRIEhDrbuzwG09lnQJKByFgRMRcffhpJwlCavomYd+qST6ol+2b0i3yJ66K31G7ZcHnVKbOFttH7IPmHWgC57Nez5vjYNuyrA3SFHcPfDsM9qQNyGrF4XXGzZ5AJmFHPtm2mtD2G3dpvpQwoIjXMAfMf3c2X/ZlUxmwf3O16rYbZFr7fyIemhONdLmWmd4zCGJjoO6u65bfeO26XuSsSgF1gvPdKDs1CZ9L+xLO/GKuVQdRmLC9gDfnw0HfpoZCLcDKCW1aSCqCy2kcZuIaHzVU42qXUC1kwLKgNGS9R3jYKWI1nM6X9I0B1Q2UGqpxXN1sVMoyucRK5Fw==~3424563~3617078; ti_geo=country=CN|city=GUANGZHOU|continent=AS|tc_ip=121.35.2.123; ti_ua=Mozilla%2f5.0%20(Windows%20NT%2010.0%3b%20WOW64)%20AppleWebKit%2f537.36%20(KHTML,%20like%20Gecko)%20Chrome%2f96.0.4664.93%20Safari%2f537.36; ti_bm=; userType=Anonymous; gpn=Non-Product; mediav=%7B%22eid%22%3A%221171998%22%2C%22ep%22%3A%22%22%2C%22vid%22%3A%22V%3EV(o0%3EVk*9X'pHa%5E5wL%22%2C%22ctn%22%3A%22%22%2C%22vvid%22%3A%22V%3EV(o0%3EVk*9X'pHa%5E5wL%22%2C%22_mvnf%22%3A1%2C%22_mvctn%22%3A0%2C%22_mvck%22%3A0%2C%22_refnf%22%3A1%7D; _gat_ga_main_tracker=1; pxcts=84a68a88-3a70-11ed-98a4-4b4e476b5057; _pxff_cc=U2FtZVNpdGU9TGF4Ow==; bm_mi=E72E8E2998A099B60812D95CC458923B~YAAQZ8U8t1xK22GDAQAACK8hZRFEG3hOh/qD2QOoKt5PooKubQFRUJwuU41ZsXh5AxXscvosxmKQUwVzWiXMd3GpsKORnhGzlpqE3eoqYFIvpIRH+rmKPRVmJ5IAzT3q1UShVrDcNrLIrHA16d1iyZUOMuhpkGVlsWwU8UlswgIQYaKb0DSqoF7a3C1gbgZIQgs/pPqQx0K9GnFZgXFbm8yXck1FKFMZ8ijjhO020Auuaty+Yg88DUFThnVAl6nYI+GxNvUkTbAKD0lY5T3AQtZpPtOkcfbHmqC0gk+f2wBCGe1YlACR3G6zaVTcjLtjLK4eDBuOoUlHwtGjTsTRLGNHUrIfM6uPJdEPl0eLqGoLDafDRqgnV7o3nll4mmLpBLUtTTc=~1; ticontent=%2Fanalog%20and%20mixed-signal%2Fclocks%20%26%20timing%2Freal-time%20clocks%20(rtcs)%20%26%20timers; ga_content_cookie=%2Fanalog%20and%20mixed-signal%2Fclocks%20%26%20timing%2Freal-time%20clocks%20(rtcs)%20%26%20timers; da_lid=BCDD00D89A72EA16BCE2BB99F530830CBE|0|0|0; da_sid=8FEE33EB8E32AE8D29B3AA13B732C9070D|4|0|3; da_intState=; ak_bmsc=7976EE9DF88BE800A430E7FF6F5C60CB~000000000000000000000000000000~YAAQZ8U8t3tL22GDAQAANLUhZRHMmX+f/4J2VKsWUd57hlQx2SIQlCFMxujU0UFRHEsYIHVWbs99CiJl1Mf66ONAzkD42jge+OPSQtMOPwrthxtHjt+ioAy+xT7+wJBPVuJZ/ONitbO0Sdbb5YPTtOmVs/yKk3aJuF5Gyk3kf7cDo5Q3PUQBREpgVqMZRlszTzB6T3V0Fzv+vtB9BKDlNX9T24GfOal8SW6OOdLMtbx/BIMKflbTbTAsPFllIvEdhU97Su9U9T+X44IO0C1yGm4xUOffCB+20fnKxxa0fB0OC71r6snUflU57iw1m93MrgYtET7SSKVE5hdjtL4JMPQaoPLN0URPPV3wnh0K662XTZ10JHOdfUmFSFwcqLMR1KAZaryVN6deYbhqJ5rQXw3MEbzlNLY3P9kjLYorpc1CjQske9Yx57Qaq8a3EG8DLZY6VK/XFyfKSDlFglPFXYULEHs=; Qs_pv_470423=2745392645362471000%2C4285938632797885400%2C2810462465018459600%2C4189095345180553000%2C1243611918178687700; tipage=%2Fanalog%20and%20mixed-signal%2Fclocks%20%26%20timing%2Freal-time%20clocks%20(rtcs)%20%26%20timers%2Freal-time%20clocks%20(rtcs)%20%26%20timers%20products-cn; tipageshort=real-time%20clocks%20(rtcs)%20%26%20timers%20products-cn; ga_page_cookie=real-time%20clocks%20(rtcs)%20%26%20timers%20products-cn; ABTasty=uid=0cjpavvk9wnrrrwk&fst=1663841660388&pst=1663841660388&cst=1663847478322&ns=2&pvt=22&pvis=4&th=; ABTastySession=mrasn=&sen=3&lp=https%253A%252F%252Fwww.ti.com.cn%252Fzh-cn%252Famplifier-circuit%252Finstrumentation%252Fproducts.html%2523p358max%253D24%253B50; _px3=3dc91279d0f8be4de87bd7429a62ac3a4e47bb76cd18485670dd2d24a9a77bb3:YbVcwdKJ6VKjcXrCrW1deAz4l6c6cwWvF5v68WZ9NctY3yZTFx5cuS0OSmEymhyMARn06B+Se8hjomWxt7FA/A==:1000:Ac5xSCH0xsEmWWCdY8cmnbVlJ5Y8PSSQmfJX+GuwEBt2B46eXGMFTN4I3xo85r8BPsghdKd2Qv+wnrdc2YAL1QQ+edMB0ZVsZP2gIOY9nqz1sBuKUNKY0V8tYF7ku5xY3yXJEUaQlZ11GZj9ruyK5G6YI54os3F5xLyDfw1+F0a3mXS+wfavzGz6IKYN1Gz1khRoKEozKIFLbaAkX0p+Pg==; _pxde=5ac0c31fbd77e81c5a08c6e00a7b31ef5f69788bfc933f05dd1f64d7d9d8b47b:eyJ0aW1lc3RhbXAiOjE2NjM4NDkwNjE5NDcsImZfa2IiOjAsImlwY19pZCI6W10sImluY19pZCI6WyJiMzEyMWZmM2FlNGQwNDU2ZmQwMDI2YTUzNjJjMjY5ZiJdfQ==; utag_main=v_id:018364b0e3f4001314def085217c0506d001606500bd0$_sn:1$_ss:0$_pn:23%3Bexp-session$_st:1663850864569$ses_id:1663841657844%3Bexp-session$free_trial:false$dc_visit:1$dc_event:24%3Bexp-session$dc_region:ap-east-1%3Bexp-session; bm_sv=F594272FBFDA970ADD8EF7B1B88DD629~YAAQZ8U8txZS22GDAQAAMuYhZRELQhQDFi5uMH/2zp9+VsYXi0Ajoa7XmnX/ccNwG5d1kIzyQt3pP/0lXgKgzFAk6icZgG6SmTIaTAhoLPsLHtlxWTTYdXkHFeTsMm0V2OO71WkxgjjnUOmBREOoTGRMSiGvLT06Ekn0agWPTWiLbfoz9BA5A/j39lg5NLjEXo0jovQAkeDBdai/mdTgAx6/h2PkzHu3gPT/+PBgVRo4ETvS5GJ7akA0HdAFdbkq~1; ti_rid=13e3642; _abck=73D9ACE1E904F6A0BB0164BA18B5C3E2~-1~YAAQZ8U8tzNS22GDAQAA1OYhZQgXu78fU4MwKONm0GsNW6XirzDPEKofuo4B0UxCXafv6PuRRNIM+/GpL3aKFUEPTeTILYmxbt8hVMHXxWaAuVyVbnawuJOUsjJxItYm4DkvA94tcTLKJ1okIX2AMsUlvRo12cIZm+TtT/tW/rYYZQHIXPJVLOBq5hF/E1ZqmZryLVJ9lmWp8C2btvZRANjkL/zNMus+JiIh6IqQf+rQko7S3peCv+9KC3u4zWBgP7MYY9HQJZBgvKRU1O8OgQFlyJoDBDApxAJrVRuSH1RvZj80PCSujR6h47j3mcf/g7dOpwasWLafGDmbq+kvyuGxzYID5jgUzVnmEL4qYyycP33Am8a/vwDhD8ovQWthlqBrC+PlcXncTqIvqej6MSDg9h1gElU1Yttm5b3NwPGXqSHkf2EThlZmqHqbtc8Caacuijb90Up3BS1UnMSQYm7q~-1~-1~-1",
        "Referer": "https://www.ti.com.cn/zh-cn/amplifier-circuit/instrumentation/products.html"
    }

    res_html = requests.get(url=url, headers=headers)
    res = res_html.content.decode("utf-8")
    json_res = json.loads(res)

    # 获取商品列表
    products_list = json_res["ParametricResults"]
    pro_res_list = []
    # 结果格式化处理
    for pro_dict in products_list:
        # 商品名称
        pro_name = pro_dict.get("o3")
        # 商品类型
        pro_features = pro_dict.get("o7")
        # 商品svg
        pro_data_svg = pro_dict.get("o10")
        # 商品描述
        pro_desc = pro_dict.get("p2192")

        pro_attr_list = [pro_name, pro_features, pro_data_svg, pro_desc]
        pro_res_list.append(pro_attr_list)

    return pro_res_list


if __name__ == '__main__':
    url = "https://www.ti.com.cn/selectiontool/paramdata/family/500/results?lang=cn&output=json"
    res_to_excel = get_pro(url)
    print(res_to_excel)
    # 存入csv文件
    df = pd.DataFrame(data=res_to_excel, columns=["商品名称", "商品类型", "商品svg", "商品描述"])
    df.to_excel("./ti商品信息.xlsx", index=False)
示例2- 使用requests获取html数据, 并使用xpath解析
import requests
from lxml import etree
import pandas as pd


def get_pro(url):

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36",
    }

    res_html = requests.get(url=url, headers=headers)

    html_str = res_html.content.decode("utf-8")

    # 解析网页结果
    html = etree.HTML(html_str)
    pro_info = html.xpath("//tbody//ul[@class='l02-zb']")
    pro_res_list = []
    for pri_i in pro_info:
        try:
            pro_xinghao = pri_i.xpath("./li[1]/*/@title")[0]
            pro_pingpai = pri_i.xpath("./li[2]/a/text()")[0].strip()
            pro_fengzhuang = pri_i.xpath("./li[3]/*/@title")[0]
            pro_desc = pri_i.xpath("./li[4]/*/@title")[0]
        except:
            continue


        pro_list = [pro_xinghao, pro_pingpai, pro_fengzhuang, pro_desc]
        pro_res_list.append(pro_list)
    return pro_res_list


if __name__ == '__main__':
    res_list = []
    res_to_excel = []
    for page_num in range(300, 333):
        url = f"https://list.szlcsc.com/catalog/{page_num}.html"
        res_list = get_pro(url)
        res_to_excel.extend(res_list)
    print(res_to_excel)
    # 存入csv文件
    df = pd.DataFrame(data=res_to_excel, columns=["商品型号", "商品品牌", "商品封装", "商品描述"])
    df.to_excel("./立创商品信息.xlsx", index=False)
示例3- requests发送post请求
import requests


url = "posturl"

headers = {
    "User-Agent”: “Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36"
}

form_data = {
    "from": "en",
    "to": "zh",
    "q": "lucky boy"
}


response = requests.post(url, params=form_data, headers=headers)
res = response.content.decode('utf-8')
示例4- 使用post请求添加参数
import requests
import urllib.parse


url = 'https://fanyi.baidu.com/v2transapi?from=zh&to=en'


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36'
}

params = {
    "from": "zh",
    "to": "en"
}


data = {
    "from": "zh",
    "to": "en",
    "query": "你好",
    "transtype": "translang",
    "simple_means_flag": "3",
    "sign": "232427.485594",
    "token": "fa3f170535ad8b9d05540a6c20471a59",
    "domain": "common"
}

data = urllib.parse.urlencode(data)
params = urllib.parse.urlencode(params)
print(data)
print(params)


resposne = requests.post(url, params=params, data=data, headers=headers)
print(resposne.status_code)
print(resposne.content.decode('GBK'))
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值