searchpy.py
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.support.ui import WebDriverWait
import time,json,threading
#传入代理地址
proxy = {"host": '10.10.10.10', "port": 8888}
profile = FirefoxProfile()
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', proxy.get('host'))
profile.set_preference('network.proxy.http_port', proxy.get('port'))
profile.set_preference('network.proxy.ssl', proxy.get('host'))
profile.set_preference('network.proxy.ssl_port', proxy.get('port'))
def driver_init(search_name):
#driver = webdriver.Firefox(profile) #启用代理
driver = webdriver.Firefox() #不启用代理
driver.get("http://www.baidu.com")
driver.implicitly_wait(30)
driver.find_element_by_xpath("//*[@id=\"kw\"]").send_keys(search_name)
driver.find_element_by_xpath("//*[@id=\"su\"]").click()
return driver
class roll_localpage():
# 在实力初始化的时候初始化参数初始化
def __init__(self, driver, domane, search_name):
self.domane = domane
self.search_name = search_name
self.driver = driver
# 实现定位功能 定位到网页后点击
def find_def(self, page):
#定位 点击功能
x = 1
for i in self.driver.find_elements_by_class_name("c-showurl"):
id = str((page) * 10 + x)
if i.text.find(self.domane) != -1:
time.sleep(1)
# 在找到页面后点击处理
self.driver.find_element_by_xpath(
"/html/body/div[1]/div[5]/div[1]/div[3]/div[@id=\"" + id + "\"]/h3/a").click()
time.sleep(15)
return 0
# 获取当前页面的定位确定处理到那一条数据
x = x + 1
return 1
# 显式超时配置
def timeouts(self):
time.sleep(1)
self.driver.implicitly_wait(30)
WebDriverWait(self.driver, 10).until(lambda driver: self.driver.find_elements_by_class_name("fk"))
# 翻页功能
def roll_page(self, page):
self.driver.implicitly_wait(10)
for i in self.driver.find_elements_by_class_name("pc"):
if int(i.text) == int(page + 1):
i.click()
self.driver.switch_to.window(self.driver.window_handles[0])
return 0
# 翻滚失败返回1
return 1
def run(self):
i = 0
while i < 150:
self.timeouts()
print(i)
if 0 == self.find_def(i):
self.driver_check()
i = 0
else:
i += 1
self.timeouts()
print(self.roll_page(i))
#清楚窗口信息初始化为单窗体
def driver_check(self):
try:
self.driver.delete_all_cookies()
window_len = len(self.driver.window_handles)
info = self.driver.window_handles
print(self.driver.window_handles)
for i in range(window_len - 1 ):
print(i)
self.driver.switch_to.window(info[i])
self.driver.close()
self.driver.switch_to.window(self.driver.window_handles[0])
self.driver.get("http://www.baidu.com")
time.sleep(2)
self.driver.find_element_by_xpath("//*[@id=\"kw\"]").send_keys(self.search_name)
self.driver.find_element_by_xpath("//*[@id=\"su\"]").click()
self.timeouts()
except Exception as driver_error:
try:
self.driver.quit()
except Exception as create_driver:
self.driver = driver_init(self.find_name)
def run_web(check_json):
driver = driver_init(check_json['search'])
thread = roll_localpage(driver, check_json['domane'], check_json['search'])
th1 = threading.Thread(target=thread.run)
th1.start()
return 0
with open("./check.json", "r", encoding='utf-8') as fs:
check_json = fs.read()
check_json = json.loads(check_json)
for i in check_json:
run_web(i)
while True:
pass
check.json
[
{"domane": "要检查的域名", "search": "百度输入框输入搜索的内容"},
{"domane": "要检查的域名", "search": "百度输入框输入搜索的内容"}
]