我们要操作的网站是https://login2.scrape.center/
需要的库
import requests
from bs4 import BeautifulSoup
import redis
import re
准备工作
base_url = 'https://login2.scrape.center/'
login_url = 'https://login2.scrape.center/login'
请求头
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
登陆表单
data = {
'username': 'admin',
'password': 'admin'
}
连接redis数据库
r = redis.Redis(host="127.0.0.1", port=6379, password=None, db=1)
创建一个session对象
session = requests.Session()
第一次请求:目的是为了获取cookie
session.post(url=login_url, data=data, headers=headers)
第二次请求:目的是为了获取数据
response = session.get(url=base_url, headers=headers)
# 转换为文本
text = response.text
# 实例化
soup = BeautifulSoup(text, 'lxml')
# 找到所有div
div_ls = soup.find_all(name="div", class_=re.compile("el-card item m-t is-hover-shadow"))
遍历div
for div in div_ls:
# 标题
title = div.find(name='h2', attrs={'class': "m-b-sm"}).get_text()
# 评分
score = div.find(name="p", class_=re.compile("score m-t-md m-b-n-sm")).get_text()
# 4.持久化存储
r.set(name=title, value=score)
# 提示
print("{}已获取".format(title))
完整代码
import requests
from bs4 import BeautifulSoup
import redis
import re
base_url = 'https://login2.scrape.center/'
login_url = 'https://login2.scrape.center/login'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
data = {
'username': 'admin',
'password': 'admin'
}
# 连接redis数据库
r = redis.Redis(host="127.0.0.1", port=6379, password=None, db=1)
session = requests.Session()
session.post(url=login_url, data=data, headers=headers)
response = session.get(url=base_url, headers=headers)
text = response.text
soup = BeautifulSoup(text, 'lxml')
div_ls = soup.find_all(name="div", class_=re.compile("el-card item m-t is-hover-shadow"))
for div in div_ls:
# 标题
title = div.find(name='h2', attrs={'class': "m-b-sm"}).get_text()
# 评分
score = div.find(name="p", class_=re.compile("score m-t-md m-b-n-sm")).get_text()
# 4.持久化存储
r.set(name=title, value=score)
# 提示
print("{}已获取".format(title))
print("结束")