import time
from requests_html import HTMLSession
from fake_useragent import UserAgent as ua
from tomorrow import threads
headers = {"User-Agent": ua().Chrome}
session = HTMLSession()
url_list = ["https://movie.douban.com",
"http://www.1905.com/",
"http://www.mtime.com/",
"https://www.dy2018.com/",
"http://dytt8.net",
"https://www.piaohua.com/",
"http://maoyan.com",
"https://www.xigua110.com/",
"https://www.vmovier.com/",
"http://movie.kankan.com/",
"https://107cine.com/",
"http://movie.youku.com",
"http://film.qq.com","https://dianying.taobao.com/",
"http://www.wandafilm.com/",
"http://www.dygang.net/","http://dianying.2345.com/",
]
def get_req(url, timeout=10):
req = session.get(url, headers=headers, timeout=timeout)
if req.status_code == 200:
return req
@threads(5)
def async_get_req(url, timeout=10):
req = session.get(url, headers=headers, timeout=timeout)
if req.status_code == 200:
return req
def get_xpath(req, xpath_str):
return req.html.xpath(xpath_str)[0].strip().replace("\n", "")
start=time.time()
for i in url_list:
print(get_xpath(get_req(i),"//title//text()"))
end=time.time()
print("普通方式花费时间",end-start)
start2 = time.time()
req_list = []
for url in url_list:
req = async_get_req(url)
req_list.append(req)
for req in req_list:
print(get_xpath(req, "//title//text()"))
end2 = time.time()
print("并发后花费时间", end2 - start2)