这次我们要打开一个动漫网址,然后进行一个搜索可以知道里面有哪些动漫
from selenium.webdriver import Firefox
from selenium.webdriver.common import by
import time
from selenium.webdriver.common.keys import Keys
import requests
from urllib.parse import quote # 可以将汉字转化为编码
import re
导入这些库
web = Firefox()
web.get('https://www.omofun.top/?ref=lanrenao.com')
初始化,用seleium(自动化测试工具)进入这个网址
web.find_element(by.By.XPATH, '/html/body/div/div[1]/strong/div/ul/li[5]/a/span').click()
使用seleium中by,通过xpath寻找目标并点击
# 将汉字转化成编码,爬取所有的动漫类型
keyword = thing
keywords = quote(keyword) # 编码
因为网址中需要编码,所以导入quote这个库去进行汉字转化为编码
具体源码如下
web = Firefox()
web.get('https://www.omofun.top/?ref=lanrenao.com')
time.sleep(5)
web.find_element(by.By.XPATH, '/html/body/div/div[5]/div/strong/div/div').click()
# 关闭提示
while True:
thing = input('请输入你想看的动漫>>>')
if thing == '今日更新':
try:
web.find_element(by.By.XPATH, '/html/body/div/div[1]/strong/div/ul/li[5]/a/span').click()
web.switch_to.window(web.window_handles[-1])
todaytext = web.page_source
obj2 = re.compile('<a class="module-poster-item module-item">(?P<href>.*?)title="(?P<movies>.*?)">')
items2 = obj2.findall(todaytext)
time.sleep(5)
for today in items2:
print(today)
except:
print('404 系统出错了')
else:
web.find_element(by.By.XPATH, '/html/body/div/strong/div/div/div[2]/div/form/div[1]/input').send_keys(thing, Keys.ENTER)
web.switch_to.window(web.window_handles[-1]) # 转化为搜索完成后的页面
# 将汉字转化成编码,爬取所有的动漫类型
keyword = thing
keywords = quote(keyword) # 编码
for i in range(1,10):
urls = 'https://www.omofun.top/index.php/vod/search/page/' + str(i) + '/wd/' + keywords + '.html'
# for url in urls:
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76"
}
respon = requests.get(urls, headers)
text = respon.text
obj = re.compile('<strong>(?P<movie>.*?)</strong>')
items = obj.findall(text)
item = items[3:]
for f in item:
print(f)
print('共有'+str(len(item))+'部动漫在这页')
continue
repeat = input("是否要继续查询?(输入y/n)")
if repeat == 'y':
continue
else:
break
print('感谢你的查询!')
web.quit()