一、抽屉
-
爬取数据(携带请求头)
import requests from bs4 import BeautifulSoup r1 = requests.get( url='https://dig.chouti.com/', headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } ) soup = BeautifulSoup(r1.text,'html.parser') # 标签对象 content_list = soup.find(name='div',id='content-list') # [标签对象,标签对象] item_list = content_list.find_all(name='div',attrs={'class':'item'}) for item in item_list: a = item.find(name='a',attrs={'class':'show-content color-chag'}) a.text.strip() print(a.text)
-
点赞
import requests # 1. 查看首页 r1 = requests.get( url='https://dig.chouti.com/', headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } ) # 2. 提交用户名和密码 r2 = requests.post( url='https://dig.chouti.com/login', headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' }, data={ 'phone':'8613121758648', 'password':'woshiniba', 'oneMonth':1 }, cookies=r1.cookies.get_dict() # 获取刚进首页时的cookie ) # 3. 点赞 r3 = requests.post( url='https://dig.chouti.com/link/vote?linksId=20435396', headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' }, cookies=r1.cookies.get_dict() ) print(r3.text)
二、GitHub
# 1. 访问登陆页面,获取 authenticity_token
i1 = requests.get('https://github.com/login')
soup1 = BeautifulSoup(i1.text, features='lxml')
tag = soup1.find(name='input', attrs={'name': 'authenticity_token'}) # 获取csrf_token
authenticity_token = tag.get('value')
c1 = i1.cookies.get_dict()
i1.close()
# 1. 携带authenticity_token和用户名密码等信息,发送用户验证
form_data = {
"authenticity_token": authenticity_token,
"utf8": "",
"commit": "Sign in",
"login": "wupeiqi@live.com",
'password': 'xxoo'
}
i2 = requests.post('https://github.com/session', data=form_data, cookies=c1)
c2 = i2.cookies.get_dict()
c1.update(c2) # 将c1内容字典换成c2内容
i3 = requests.get('https://github.com/settings/repositories', cookies=c1)
soup3 = BeautifulSoup(i3.text, features='lxml')
list_group = soup3.find(name='div', class_='listgroup')
from bs4.element import Tag
for child in list_group.children:
if isinstance(child, Tag):
project_tag = child.find(name='a', class_='mr-1')
size_tag = child.find(name='small')
temp = "项目:%s(%s); 项目路径:%s" % (project_tag.get('href'), size_tag.string, project_tag.string, )
print(temp)
三、拉钩
import re
import requests
r1 = requests.get(
url='https://passport.lagou.com/login/login.html',
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
}
)
X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r1.text, re.S)[0]
X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r1.text, re.S)[0]
r2 = requests.post(
url='https://passport.lagou.com/login/login.json',
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'X-Anit-Forge-Code':X_Anti_Forge_Code,
'X-Anit-Forge-Token':X_Anti_Forge_Token,
'Referer': 'https://passport.lagou.com/login/login.html', # 上一次请求地址是什么?
},
data={
"isValidate": True,
'username': '15131255089',
'password': 'ab18d270d7126ea65915c50288c22c0d',
'request_form_verifyCode': '',
'submit': ''
},
cookies=r1.cookies.get_dict()
)
print(r2.text)