requests库的常用例子

例子1.requests.session登录github

例子2 post提交数据

例子3 使用代理访问网页

例子4 一个爬取贴吧帖子标题和链接的程序

1.requests.session登录github

import requests
import re

def  dologin():
    session = requests.session()
    session.headers = {
        "user-agent": "Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 86.0.4240.198 Safari / 537.36"
    }

    url1 ="https://github.com/login"
    res1 = session.get(url1).content.decode()

    token = re.findall('name="authenticity_token" value="(.*?)" />', res1)[0]
    print(token)

    url2 = "https://github.com/session"
    data = {
        "commit": "Sign in",
        "authenticity_token": token,
        "login": "username@qq.com",#账号
        "password": "userpass",# 密码
        "webauthn-support": "support",
    }

    session.post(url2, data)
    url3 = "https://github.com/settings/profile"
    response = session.get(url3)
    #print(response.content)
    with open("login.html","wb") as f:
        f.write(response.content)



if __name__ == '__main__':
    dologin()

2.post提交数据

import requests
import  json

class baidu():
    def __init__(self,word):
        self.url ="http://fy.iciba.com/Ajax.php?afy"
        self.header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",

        }
        self.data = {
            "f": "auto",
            "t": "auto",
            "w": word,

        }
    def get_data(self):
        response = requests.post(self.url, data=self.data, headers= self.header)
        return  response.content

    def run(self):
        response = self.get_data()

        print(response.decode())



if __name__ == '__main__':
    bd = baidu("人们")
    bd.run()

3.使用代理

import requests
url ="https://www.ip138.com/"
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",

}
data = {
    "wd": "kof97"
}

proxy = {
    "http": "http://58.243.28.155:4245"
}


response =requests.get(url, proxies=proxy)
response.encoding='gbk'

print(response.text)
with open("baidu.html","wb")as f:
    f.write(response.content)

4.一个爬取贴吧帖子标题和链接的例子 

import requests
from lxml import etree


class tieba(object):
    def __init__(self,name):
        self.url = "https://tieba.baidu.com/f?kw=f&kw="+name
        self.header ={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
            #"User-Agent": "Mozilla/4.0 (comptible; MSIE 5.01; Windows NT 5.0; DigExt)"
        }

    def getdata(self,url):
        rsp = requests.get(url,headers=self.header).content.decode("utf-8")
        #print(rsp)
        return rsp


    def pd(self, data):
        data = data.replace("<!--", "").replace("-->", "")
        html = etree.HTML(data)
    #    print(etree.tostring(html))
        #html.xpath()
        a = html.xpath('//*[@class="j_th_tit "]')
        print(len(a))
        url_list =[]
        for el in a:
            #print(el)
            tmp ={}
            tmp['title'] = el.xpath("./text()")[0]
            tmp['link'] = 'https://tieba.baidu.com/'+el.xpath("./@href")[0]
            url_list.append(tmp)
        print(url_list)
        try:
            nexturl = 'https:' + html.xpath('//a[@class="next pagination-item "]/@href')[0]
        except:
            nexturl =None
      #  print(nexturl)
        return url_list,nexturl

    def run(self):
        nexturl = self.url
        while True:
            data = self.getdata(nexturl)
            url_list,nexturl = self.pd(data)
            if nexturl == None:
                break


if __name__ == '__main__':
    tb = tieba("红名")
    tb.run()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值