1.爬取某电影网站的电影名称
下面展示一些 代码
。
import requests
from lxml import etree
def dianying(number):
url ='https://www.piaku.cc/p/DY-'+str(number)+'/'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
# print(response)#打印是否200
html = etree.HTML(response.text)
name = html.xpath('//div[@class="li-bottom"]/h3/a/text()')
for i in name:
print('正在下载:' + i)
with open('d.xlsx', 'a+', encoding='utf-8') as f:
f.write(i + '\n')
for i in range(1,21):
dianying(i)
print('------------------------第'+str(i)+'页下载完成------------------------')
下面展示一些 效果图
。
2.批量获取多个页面的电影照片
下面展示一些 代码
。
import os
import re
from time import sleep
import requests
from lxml import etree
def piaku_photo(number):
url = 'https://www.piaku.cc/p/DY-'+str(number)+'/'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
# print(response)
html = etree.HTML(response.text)
name = html.xpath('//div[@class="li-img"]/a/img/@alt')
photo = html.xpath('//div[@class="li-img"]/a/img/@src')
photos = []
names = []
#返回的图片链接有些不存在,需要重新处理一下
for i, z in zip(photo, name):
if i[0:4] == 'http':
photos.append(i)
names.append(z)
path = '片库照片'
#创建文件夹,判断是否存在
if not os.path.exists(path):
os.mkdir(path)
#下载图片
for i, a in zip(names, photos):
name1 = i + '.jpg'
photoaddress = requests.get(a)
with open(path + '/' + name1, 'wb') as f:
sleep(0.5)
f.write(photoaddress.content)
print(i + ":" + a + '----------下载成功')
#批量下载多个页面
for i in range(1,3):
piaku_photo(i)
print('------------------------第'+str(i)+'页下载完成------------------------')
最终效果展示: