python笔记(爬虫 抽屉、GitHub、拉钩)

一、抽屉

  1. 爬取数据(携带请求头)

    import requests
    from bs4 import BeautifulSoup
    
    r1 = requests.get(
        url='https://dig.chouti.com/',
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
    )
    
    soup = BeautifulSoup(r1.text,'html.parser')
    
    # 标签对象
    content_list = soup.find(name='div',id='content-list')
    # [标签对象,标签对象]
    item_list = content_list.find_all(name='div',attrs={'class':'item'})
    for item in item_list:
        a = item.find(name='a',attrs={'class':'show-content color-chag'})
        a.text.strip()
        print(a.text)
    
  2. 点赞

    import requests
    # 1. 查看首页
    r1 = requests.get(
        url='https://dig.chouti.com/',
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
    )
    
    # 2. 提交用户名和密码
    r2 = requests.post(
        url='https://dig.chouti.com/login',
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        },
        data={
            'phone':'8613121758648',
            'password':'woshiniba',
            'oneMonth':1
        },
        cookies=r1.cookies.get_dict()    # 获取刚进首页时的cookie
    )
    
    
    # 3. 点赞
    r3 = requests.post(
        url='https://dig.chouti.com/link/vote?linksId=20435396',
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        },
        cookies=r1.cookies.get_dict()
    )
    print(r3.text)
    

二、GitHub

# 1. 访问登陆页面,获取 authenticity_token
i1 = requests.get('https://github.com/login')
soup1 = BeautifulSoup(i1.text, features='lxml')
tag = soup1.find(name='input', attrs={'name': 'authenticity_token'})  # 获取csrf_token
authenticity_token = tag.get('value')
c1 = i1.cookies.get_dict()
i1.close()

# 1. 携带authenticity_token和用户名密码等信息,发送用户验证
form_data = {
"authenticity_token": authenticity_token,
    "utf8": "",
    "commit": "Sign in",
    "login": "wupeiqi@live.com",
    'password': 'xxoo'
}

i2 = requests.post('https://github.com/session', data=form_data, cookies=c1)
c2 = i2.cookies.get_dict()
c1.update(c2)  # 将c1内容字典换成c2内容
i3 = requests.get('https://github.com/settings/repositories', cookies=c1)

soup3 = BeautifulSoup(i3.text, features='lxml')
list_group = soup3.find(name='div', class_='listgroup')

from bs4.element import Tag

for child in list_group.children:
    if isinstance(child, Tag):
        project_tag = child.find(name='a', class_='mr-1')
        size_tag = child.find(name='small')
        temp = "项目:%s(%s); 项目路径:%s" % (project_tag.get('href'), size_tag.string, project_tag.string, )
        print(temp)

三、拉钩

import re
import requests

r1 = requests.get(
    url='https://passport.lagou.com/login/login.html',
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    }
)
X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r1.text, re.S)[0]
X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r1.text, re.S)[0]

r2 = requests.post(
    url='https://passport.lagou.com/login/login.json',
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
        'X-Anit-Forge-Code':X_Anti_Forge_Code,
        'X-Anit-Forge-Token':X_Anti_Forge_Token,
        'Referer': 'https://passport.lagou.com/login/login.html', # 上一次请求地址是什么?
    },
    data={
        "isValidate": True,
        'username': '15131255089',
        'password': 'ab18d270d7126ea65915c50288c22c0d',
        'request_form_verifyCode': '',
        'submit': ''
    },
    cookies=r1.cookies.get_dict()
)
print(r2.text)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值