实例
import requests
import parsel
import time
import os
start = time.time() # 记录程序开始时间
"""
1,定义url,伪造headers
2,请求数据
3,解析数据
4,提取数据
5,持久化保存
"""
# 判断文件是否存在
if not os.path.exists("./image"):
# 创建文件
os.mkdir("./image")
print("创建文件image成功")
# 1,定义url,伪造headers
url = "https://fm.qq.com/category/39087_38979"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
}
# 2,请求数据
response = requests.get(url=url,headers=headers).text
# 3,解析数据
sel = parsel.Selector(response)
lis = sel.css('.album-list .item')[0:-1]
count = 0
for li in lis:
time.sleep(0.5)
# 二次解析
img_url = li.css("img::attr(src)").get() # 图片url
title = li.c