升级版加入指定时间段操作功能,例如程序在在上午8:30 到 11:30 下午14:00 到 17:00 晚上19:00 到 晚上 22:00运行
项目1.0:
循环_file:
文件命名:循环_file
from selenium import webdriver
import time
import random
import requests
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
import json
class Baidusearch():
def __init__(self):
pass
def agent(self):
try:
ag_url = '代理ip地址'
ip_res = requests.get(ag_url).text.strip()
success = json.loads(ip_res)['success']
if success == True:
resp_json = json.loads(ip_res)['data'][0]
proxies = f'{resp_json["ip"]}:{resp_json["port"]}'
# test_ip(proxies)
ip_tests = self.test_ip2(proxies)
return proxies, ip_tests
else:
quit()
except:
print('获取代理ip失败')
quit()
def test_ip2(self,pro):
try:
proxies = {
'http': f'http://{pro}', 'https': f'https://{pro}'}
res = requests.get('https://www.baidu.com/', timeout=3, proxies=proxies)
a = res.status_code
if a == 200:
print('ip可用')
ip_tests = False
return ip_tests
else:
print('代理ip无效')
time.sleep(2)
ip_tests = True
return ip_tests
except:
print('代理IP无效')
ip_tests = True
time.sleep(2)
return ip_tests
def test_ip(self,pro):
try:
chromeOptions = webdriver.ChromeOptions()
# 设置代理
chromeOptions.add_argument(f"--proxy-server=http://{pro}")
# 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
self.driver = webdriver.Chrome(options=chromeOptions)
self.driver.get('https://www.baidu.com/')
time.sleep(2)
searchBox = self.driver.find_element_by_id("kw")
searchBox.send_keys('ip')
time.sleep(2)
button = self.driver.find_element_by_id("su")
button.click()
time.sleep(5)
self.driver.close()
return
except:
print('打开网页搜索检测代理ip失败')
pass
def main(self):
try:
searchBox = self.driver.find_element_by_id("kw")
searchBox.send_keys('鸡群')
time.sleep(2)
button = self.driver.find_element_by_id("su")
button.click()
time.sleep(2)
#窗口最大化
self.driver.maximize_window()
time.sleep(2)
self.get_information(0, True)
time.sleep(5)
#切换到新页面 [-1]是打开所有页面的最后一页
self.driver.switch_to.window(self.driver.window_handles[-1])
self.mouse_move1()
self.skip_web()
except:
print('打开百度搜索失败')
pass
def get_information(self,i, find_key):
try:
# 抓取信息
target_link = 'www.yangji.com'
while find_key:
i += 1
wait = WebDriverWait(self.driver,10).until(EC.presence_of_all_elements_located((By.CLASS_NAME,'c-showurl')))
list_link = self.driver.find_elements_by_class_name('c-showurl')
# link_num = len(list_link)
# print(f'第{i}页/{link_num}')
for link in list_link:
link_content = link.get_attribute('textContent').strip()
# print(link_content)
if target_link in link_content:
find_key = False
try:
self.driver.execute_script("arguments[0].scrollIntoView();", link) #滚动至目标元素可见位置
time.sleep(2)
link.click()
# arguments[0].scrollIntoView(),不能随意使用,会先把元素element对象的表格“顶端”移动到与当前窗口的“顶部”对齐,如果元素当前可见,可能移动后就不可见了,导致定位报错。
#因为会把元素顶端对齐窗口顶部,有时候也会出现跳转后,元素仍然不可见的情况。如果是需要点击这个元素,可以使用下面这种方式:
except:
self.driver.execute_script("arguments[0].click();", link) #直接点击不可见的目标元素,不再先跳转。
print('目标网页在目前关键词搜索第%d页'%i)
return
else:
pass
n_button = self.driver.find_elements_by_class_name('n')[-1]
self.driver.execute_script("arguments[0].scrollIntoView();", n_button)
time.sleep(2)
n_button.click()
time.sleep(2)
except:
print('在百度搜索中寻找目标网页失败')
def mouse_move1(self):
try:
#鼠标向下分段次滑动
judge = True
b = 0
while judge:
if b < 12000:
a = random.uniform(1000,3000)
self.driver.execute_script("window.scrollBy(0,%s)"%a)
#(x,y)分别指横向滚动条和纵向滚动条
#self.driver.execute_script("window.scrollBy(0,-5000);")就是向上滑
time.sleep(5)
b += a
else:
judge = False
#鼠标向上滑动到顶端
judge1 = True
while judge1:
if b > 0:
a1 = random.uniform(3000, 5000)
self.driver.execute_script('window.scrollTo({ top: 0, behavior: "smooth" })')
b -= a1
time.sleep(0.3)
else:
judge1 = False
time.sleep(3)
return
except:
print('目标网页主页面滑动失败')
pass
def mouse_move2(self):
try:
#鼠标向下分段次滑动
judge = True
b = 0
while judge:
if b < 3000:
a = random.uniform(1000,1500)
self.driver.execute_script('window.scrollTo(0,document.body.scrollHeight)') # 直接划到底部
time.sleep(2)
b += a
else:
judge = False
#鼠标向上滑动到顶端
judge1 = True
while judge1:
if b > 0:
a1 = random.uniform(1000, 1500)
self.driver.execute_script('window.scrollTo({ top: 0, behavior: "smooth" })')
b -= a1
time.sleep(0.3)
else:
judge1 = False
time.sleep(3)
return
except:
print('跳转页面滑动失败')
pass
def skip_web(self):
try:
#随机选择点击网站内其他网页
option = random.randint(1,2)
if option == 1:
wait = WebDriverWait(self.driver,10).until(EC.presence_of_all_elements_located((By.ID,'destoon_word')))
top_items = self.driver.find_element_by_id('destoon_word').find_elements_by_tag_name('a')
x = random.randint(0, 9)
top_item = top_items[x]
top_item.click()
time.sleep(3)
self.mouse_move2()
return
else:
wait = WebDriverWait(self.driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'menu')))
top_menus = self.driver.find_element_by_class_name('menu').find_elements_by_tag_name('a')
y = random.randint(0, 8)
top_menu = top_menus[y]
top_menu