爬虫-上版优化

目的:减少点击量

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from datetime import date,timedelta
from re import search,findall,compile


today = str(date.today())
yesterday = str(date.today() - timedelta(days=2))
province = "浙江"
query_day = yesterday

# chrome_options = Options()
# chrome_options.add_argument('--headless')
# driver = webdriver.Chrome(chrome_options=chrome_options)   #设置有误
driver = webdriver.Chrome()

driver.get("https://www.cuecp.cn/portal/index.jhtml")
# print(driver.page_source)
assert "中国联通合作方门户" in driver.title
driver.find_element_by_xpath('//*[@id="notic_content"]/div[2]/div[1]/span[2]/a').click()
driver.switch_to.window(driver.window_handles[-1])
driver.implicitly_wait(5)
sleep(1)
select_province = Select(driver.find_element_by_name("attribute4"))
select_province.select_by_visible_text(province)

js_start = "$('input[name=start_time_from]').removeAttr('readonly')"
js_stop = "$('input[name=start_time_to]').removeAttr('readonly')"
driver.execute_script(js_start)
driver.find_element_by_name("start_time_from").send_keys(query_day)
driver.execute_script(js_stop)
driver.find_element_by_name("start_time_to").send_keys(query_day)
driver.find_element_by_xpath('//*[@id="content_list"]/div[1]/table/tbody/tr[3]/td[4]/img').click()

ps = driver.find_element_by_id("title_list").get_attribute('innerHTML')
entries = findall(r'<a onclick.*>(.*)</a>',ps)
fail_info = compile(r'不足|失败')
index_entry = []

for i, entry in enumerate(entries):
    if not fail_info.search(entry):
        index_entry.append(i+1)

if len(index_entry) == 0:
    print(query_day + '无招标信息')
    driver.close()
else:
    for i in index_entry:
        if i == index_entry[0]:
            driver.find_element_by_xpath('//*[@id="title_list"]/ul/li[' + str(i) + ']/span[1]/div/a').click()
            driver.switch_to.window(driver.window_handles[-1])
            html = driver.execute_script("return document.documentElement.outerHTML")
            title = findall(r'<div class="content_title">\s*(.*)\s*</div>',html)[0]  #注意空格
            content = driver.find_element_by_xpath('//*[@id="content_list"]/div[3]').text
            print(title+'\r\n'+content+'\r\n')
            driver.close()
        else:
            driver.switch_to.window(driver.window_handles[0])

            driver.find_element_by_xpath('//*[@id="notic_content"]/div[2]/div[1]/span[2]/a').click()
            sleep(1)
            driver.switch_to.window(driver.window_handles[-1])

            select_province = Select(driver.find_element_by_name("attribute4"))
            select_province.select_by_visible_text(province)
            js_start = "$('input[name=start_time_from]').removeAttr('readonly')"
            js_stop = "$('input[name=start_time_to]').removeAttr('readonly')"
            driver.execute_script(js_start)
            driver.find_element_by_name("start_time_from").send_keys(query_day)
            driver.execute_script(js_stop)
            driver.find_element_by_name("start_time_to").send_keys(query_day)
            driver.find_element_by_xpath('//*[@id="content_list"]/div[1]/table/tbody/tr[3]/td[4]/img').click()

            driver.find_element_by_xpath('//*[@id="title_list"]/ul/li[' + str(i) + ']/span[1]/div/a').click()
            driver.switch_to.window(driver.window_handles[-1])
            html = driver.execute_script("return document.documentElement.outerHTML")
            title = findall(r'<div class="content_title">\s*(.*)\s*</div>',html)[0]  #注意空格
            content = driver.find_element_by_xpath('//*[@id="content_list"]/div[3]').text
            print(title+'\r\n'+content+'\r\n')
            driver.close()

driver.switch_to.window(driver.window_handles[0])
driver.close()






  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值