selenium+python爬虫

import requests
import json
from selenium import webdriver
import time
import xlwt

'''国网新能源----post请求'''
class NewEnergy:
    # 13个参数
    def __init__(self,index,cusName,cusNo,net,proName,etype,size,time,ptype,wtype,price,time2,index2):
        self.index = index
        self.cusName = cusName
        self.cusNo = cusNo
        self.net = net
        self.proName = proName
        self.etype = etype
        self.size = size
        self.time = time
        self.ptype = ptype
        self.wtype = wtype
        self.price = price
        self.time2 = time2
        self.index2 = index2

    def desplyInfo(self):
        print(self.index,self.cusName,self.cusNo,self.net,self.proName,self.etype,self.size,self.time,self.ptype,self.wtype,self.price,self.time2,self.index2)

# 将table中的元素保存到List,13lines
def tr_to_obj(rows):
    a_page_list = []
    for row in range(len(rows)):
        tdArr = rows[row].text.split('\n')
        # print(','.join(tdArr))
        try:
            new_energy = NewEnergy(tdArr[0],tdArr[1],tdArr[2],tdArr[3],tdArr[4],tdArr[5],tdArr[6],tdArr[7],tdArr[8],tdArr[9],tdArr[10],tdArr[11],tdArr[12])
        except Exception as e:
            pass
        else:
            new_energy = NewEnergy('jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana',
                                   'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana')
        a_page_list.append(new_energy)
    return a_page_list

'''1.启动浏览器,打开网页'''
# 使用headless无界面浏览器模式
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=chrome_options)
mainUrl = "http://sgnec.esgcc.com.cn/atlas/projectListQuery"
driver.get(mainUrl)
driver.maximize_window()

'''2.鼠标移动到-非自然人分布式并点击该li(注:该li被隐藏无法点击,需要先把外部div隐藏)'''
li_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[2]/ul/li[2]')
driver.execute_script("arguments[0].click();", li_element)

'''3.选择100条/页'''
page_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[3]/div[2]/div[2]/div[3]/div[2]/div/span[2]/div/div/input')
driver.execute_script("arguments[0].click();", page_element)
time.sleep(3)
page_num_element = driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[1]/ul/li[6]')
driver.execute_script("arguments[0].click();",page_num_element)
time.sleep(3)

'''4.解析列表中的文本->点击下一页'''
for page in range(138):
    rows = []
    table_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[3]/div[2]/div[2]/div[3]/div[1]/div[3]/table')
    rows = table_element.find_elements_by_tag_name('tr')
    tr_to_obj(rows)
    time.sleep(3)
    next_page_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[3]/div[2]/div[2]/div[3]/div[2]/div/button[2]/i')
    driver.execute_script("arguments[0].click();",next_page_element)
    time.sleep(3)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值