import requests
import json
from selenium import webdriver
import time
import xlwt
'''国网新能源----post请求'''
class NewEnergy:
# 13个参数
def __init__(self,index,cusName,cusNo,net,proName,etype,size,time,ptype,wtype,price,time2,index2):
self.index = index
self.cusName = cusName
self.cusNo = cusNo
self.net = net
self.proName = proName
self.etype = etype
self.size = size
self.time = time
self.ptype = ptype
self.wtype = wtype
self.price = price
self.time2 = time2
self.index2 = index2
def desplyInfo(self):
print(self.index,self.cusName,self.cusNo,self.net,self.proName,self.etype,self.size,self.time,self.ptype,self.wtype,self.price,self.time2,self.index2)
# 将table中的元素保存到List,13lines
def tr_to_obj(rows):
a_page_list = []
for row in range(len(rows)):
tdArr = rows[row].text.split('\n')
# print(','.join(tdArr))
try:
new_energy = NewEnergy(tdArr[0],tdArr[1],tdArr[2],tdArr[3],tdArr[4],tdArr[5],tdArr[6],tdArr[7],tdArr[8],tdArr[9],tdArr[10],tdArr[11],tdArr[12])
except Exception as e:
pass
else:
new_energy = NewEnergy('jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana',
'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana', 'jingwenjuana')
a_page_list.append(new_energy)
return a_page_list
'''1.启动浏览器,打开网页'''
# 使用headless无界面浏览器模式
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=chrome_options)
mainUrl = "http://sgnec.esgcc.com.cn/atlas/projectListQuery"
driver.get(mainUrl)
driver.maximize_window()
'''2.鼠标移动到-非自然人分布式并点击该li(注:该li被隐藏无法点击,需要先把外部div隐藏)'''
li_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[2]/ul/li[2]')
driver.execute_script("arguments[0].click();", li_element)
'''3.选择100条/页'''
page_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[3]/div[2]/div[2]/div[3]/div[2]/div/span[2]/div/div/input')
driver.execute_script("arguments[0].click();", page_element)
time.sleep(3)
page_num_element = driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[1]/ul/li[6]')
driver.execute_script("arguments[0].click();",page_num_element)
time.sleep(3)
'''4.解析列表中的文本->点击下一页'''
for page in range(138):
rows = []
table_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[3]/div[2]/div[2]/div[3]/div[1]/div[3]/table')
rows = table_element.find_elements_by_tag_name('tr')
tr_to_obj(rows)
time.sleep(3)
next_page_element = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div/div[3]/div[2]/div[2]/div[3]/div[2]/div/button[2]/i')
driver.execute_script("arguments[0].click();",next_page_element)
time.sleep(3)
selenium+python爬虫
最新推荐文章于 2024-08-07 16:07:19 发布