from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import requests
import bs4
from lxml import etree
import time
import os
import random
defset_option():
prefs={'profile.default_content_setting_values':{'images':2,'javascript':2}}
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs',prefs)#配置chrome设置#chrome_options.add_argument('--headless') #配置是否显示窗口#chrome_options.add_argument('--disable-gpu')return chrome_options
defget_data(movie_name):
headers=[{'Host':'www.zimuku.la','User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0','Referer':'http://www.zimuku.la/dld/117414.html',},{'Host':'www.zimuku.la','User-Agent':'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11','Referer':'http://www.zimuku.la/dld/117414.html',},{'Host':'www.zimuku.la','User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1','Referer':'http://www.zimuku.la/dld/117414.html',},{'Host':'www.zimuku.la','User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1','Referer':'http://www.zimuku.la/dld/117414.html',},{'Host':'www.zimuku.la','User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50','Referer':'http://www.zimuku.la/dld/117414.html',},{'Host':'www.zimuku.la','User-Agent':'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50','Referer':'http://www.zimuku.la/dld/117414.html',}]
chrome_options=set_option()
web=webdriver.Chrome(chrome_options=chrome_options)
url="http://www.zimuku.la/"
web.get(url)
elem1=web.find_element_by_name("q")
elem1.send_keys(movie_name)
elem1.send_keys(Keys.ENTER)
elem2=web.find_element_by_tag_name("tbody")#print(dir(elem2))
elem3 = elem2.find_element_by_tag_name("a")#再找到的元素中继续查找
elem3.click()#模拟点击元素
current= web.current_window_handle #生成当前窗口句柄
handles = web.window_handles #生成窗口句柄集合
web.switch_to_window(handles[1])#print(dir(web))#print(web.page_source)
elem4 = web.find_element_by_id("down1")
elem4.click()
handles1=web.window_handles
web.switch_to_window(handles1[2])
html=web.page_source
bs=bs4.BeautifulSoup(html,"lxml")
xpath=etree.HTML(html)#print(dir(xpath))
cont=xpath.xpath("//html/body/main/div/div/div/table/tbody/tr/td[1]/div/ul/li[6]/a/@href")
end_url=url+cont[0]#print(end_url)
i = random.randint(0,5)print(i)
content=requests.get(end_url,headers=headers[i],allow_redirects=False)file=requests.get(content.headers.get("Location"))
file_name=file.headers.get("Content-Disposition").split('filename=')[1].strip('"')print(file_name)format=file_name.split(".")[len(file_name.split("."))-1]ifformatin{"rar","zip"}:withopen("D:\\python_study\\subtitles\\"+file_name,"wb")as f:
f.write(file.content)else:withopen("D:\\python_study\\subtitles\\"+file_name,"w")as f:
f.write(file.text)
web.quit()if __name__=="__main__":
movie_name=input()
get_data(movie_name)
钢铁侠1
c:\users\mei\appdata\local\programs\python\python37-32\lib\site-packages\ipykernel_launcher.py:55: DeprecationWarning: use options instead of chrome_options
c:\users\mei\appdata\local\programs\python\python37-32\lib\site-packages\ipykernel_launcher.py:67: DeprecationWarning: use driver.switch_to.window instead
c:\users\mei\appdata\local\programs\python\python37-32\lib\site-packages\ipykernel_launcher.py:73: DeprecationWarning: use driver.switch_to.window instead
5
[zmk.pw]Iron.Man.2008.1080p.BluRay.x264.DTS-FGT.rar