Python项目实例——selenium自动化项目升级版——定时操作

最新推荐文章于 2023-01-03 17:47:42 发布

Fo*(Bi)

最新推荐文章于 2023-01-03 17:47:42 发布

阅读量282

点赞数

分类专栏： Python使用实例文章标签： python selenium 多线程

本文链接：https://blog.csdn.net/weixin_48615832/article/details/107567224

版权

低端版：Python的selenium自动化项目实例

升级版加入指定时间段操作功能，例如程序在在上午8:30 到 11:30 下午14:00 到 17:00 晚上19:00 到晚上 22:00运行

项目1.0：

循环_file：

文件命名：循环_file

from selenium import webdriver
import time
import random
import requests
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
import json


class Baidusearch():
    def __init__(self):
        pass

    def agent(self):
        try:
            ag_url = '代理ip地址'
            ip_res = requests.get(ag_url).text.strip()
            success = json.loads(ip_res)['success']
            if success == True:
                resp_json = json.loads(ip_res)['data'][0]
                proxies = f'{resp_json["ip"]}:{resp_json["port"]}'
                # test_ip(proxies)
                ip_tests = self.test_ip2(proxies)
                return proxies, ip_tests
            else:
                quit()
        except:
            print('获取代理ip失败')
            quit()

    def test_ip2(self,pro):
        try:
            proxies = {
   'http': f'http://{pro}', 'https': f'https://{pro}'}
            res = requests.get('https://www.baidu.com/', timeout=3, proxies=proxies)
            a = res.status_code
            if a == 200:
                print('ip可用')
                ip_tests = False
                return ip_tests
            else:
                print('代理ip无效')
                time.sleep(2)
                ip_tests = True
                return ip_tests
        except:
            print('代理IP无效')
            ip_tests = True
            time.sleep(2)
            return ip_tests

    def test_ip(self,pro):
        try:
            chromeOptions = webdriver.ChromeOptions()
            # 设置代理
            chromeOptions.add_argument(f"--proxy-server=http://{pro}")
            # 一定要注意，=两边不能有空格，不能是这样--proxy-server = http://202.20.16.82:10152
            self.driver = webdriver.Chrome(options=chromeOptions)
            self.driver.get('https://www.baidu.com/')
            time.sleep(2)
            searchBox = self.driver.find_element_by_id("kw")
            searchBox.send_keys('ip')
            time.sleep(2)
            button = self.driver.find_element_by_id("su")
            button.click()
            time.sleep(5)
            self.driver.close()
            return
        except:
            print('打开网页搜索检测代理ip失败')
            pass

    def main(self):
        try:
            searchBox = self.driver.find_element_by_id("kw")
            searchBox.send_keys('鸡群')
            time.sleep(2)
            button = self.driver.find_element_by_id("su")
            button.click()
            time.sleep(2)
            #窗口最大化
            self.driver.maximize_window()
            time.sleep(2)
            self.get_information(0, True)
            time.sleep(5)
            #切换到新页面 [-1]是打开所有页面的最后一页
            self.driver.switch_to.window(self.driver.window_handles[-1])
            self.mouse_move1()
            self.skip_web()
        except:
            print('打开百度搜索失败')
            pass

    def get_information(self,i, find_key):
        try:
            # 抓取信息
            target_link = 'www.yangji.com'
            while find_key:
                i += 1
                wait = WebDriverWait(self.driver,10).until(EC.presence_of_all_elements_located((By.CLASS_NAME,'c-showurl')))
                list_link = self.driver.find_elements_by_class_name('c-showurl')
                # link_num = len(list_link)
                # print(f'第{i}页/{link_num}')
                for link in list_link:
                    link_content = link.get_attribute('textContent').strip()
                    # print(link_content)
                    if target_link in link_content:
                        find_key = False
                        try:
                            self.driver.execute_script("arguments[0].scrollIntoView();", link) #滚动至目标元素可见位置
                            time.sleep(2)
                            link.click()
                        # arguments[0].scrollIntoView()，不能随意使用,会先把元素element对象的表格“顶端”移动到与当前窗口的“顶部”对齐，如果元素当前可见，可能移动后就不可见了，导致定位报错。
                        #因为会把元素顶端对齐窗口顶部，有时候也会出现跳转后，元素仍然不可见的情况。如果是需要点击这个元素，可以使用下面这种方式：
                        except:
                            self.driver.execute_script("arguments[0].click();", link) #直接点击不可见的目标元素，不再先跳转。
                        print('目标网页在目前关键词搜索第%d页'%i)
                        return
                    else:
                        pass
                n_button = self.driver.find_elements_by_class_name('n')[-1]
                self.driver.execute_script("arguments[0].scrollIntoView();", n_button)
                time.sleep(2)
                n_button.click()
                time.sleep(2)
        except:
            print('在百度搜索中寻找目标网页失败')

    def mouse_move1(self):
        try:
            #鼠标向下分段次滑动
            judge = True
            b = 0
            while judge:
                if b < 12000:
                    a = random.uniform(1000,3000)
                    self.driver.execute_script("window.scrollBy(0,%s)"%a)
                    #(x,y)分别指横向滚动条和纵向滚动条
                    #self.driver.execute_script("window.scrollBy(0,-5000);")就是向上滑
                    time.sleep(5)
                    b += a
                else:
                    judge = False
            #鼠标向上滑动到顶端
            judge1 = True
            while judge1:
                if b > 0:
                    a1 = random.uniform(3000, 5000)
                    self.driver.execute_script('window.scrollTo({ top: 0, behavior: "smooth" })')
                    b -= a1
                    time.sleep(0.3)
                else:
                    judge1 = False
            time.sleep(3)
            return
        except:
            print('目标网页主页面滑动失败')
            pass

    def mouse_move2(self):
        try:
            #鼠标向下分段次滑动
            judge = True
            b = 0
            while judge:
                if b < 3000:
                    a = random.uniform(1000,1500)
                    self.driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')  # 直接划到底部
                    time.sleep(2)
                    b += a
                else:
                    judge = False
            #鼠标向上滑动到顶端
            judge1 = True
            while judge1:
                if b > 0:
                    a1 = random.uniform(1000, 1500)
                    self.driver.execute_script('window.scrollTo({ top: 0, behavior: "smooth" })')
                    b -= a1
                    time.sleep(0.3)
                else:
                    judge1 = False
            time.sleep(3)
            return
        except:
            print('跳转页面滑动失败')
            pass

    def skip_web(self):
        try:
            #随机选择点击网站内其他网页
            option = random.randint(1,2)
            if option == 1:
                wait = WebDriverWait(self.driver,10).until(EC.presence_of_all_elements_located((By.ID,'destoon_word')))
                top_items = self.driver.find_element_by_id('destoon_word').find_elements_by_tag_name('a')
                x = random.randint(0, 9)
                top_item = top_items[x]
                top_item.click()
                time.sleep(3)
                self.mouse_move2()
                return
            else:
                wait = WebDriverWait(self.driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'menu')))
                top_menus = self.driver.find_element_by_class_name('menu').find_elements_by_tag_name('a')
                y = random.randint(0, 8)
                top_menu = top_menus[y]
                top_menu

最低0.47元/天解锁文章

Fo*(Bi)

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python项目实例——selenium自动化项目升级版——定时操作

循环_file：from selenium import webdriverimport timeimport randomimport requestsfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.wait import WebDriverWaitfrom selenium.webdriver.common.by import Byimport
复制链接

扫一扫