import random
import traceback
from bs4 import BeautifulSoup
import requests
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#测试
def test1():
import xlwt
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
broswer = webdriver.Edge()
broswer.get('url')
wait = WebDriverWait(broswer, 30)
input_q = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'el-table')))
#button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.button')))
#print(input_q.text)
html=wait._driver.page_source
soup = BeautifulSoup(html, features="html.parser")
href = soup.find_all('tr', {'class': 'el-table__row'})#[0].get('href')
while(len(href)==0):#直到这个class检索到才停止
input_q = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'el-table')))
html=wait._driver.page_source
soup = BeautifulSoup(html, features="html.parser")
href = soup.find_all('tr', {'class': 'el-table__row'})
#创建工作表
book = xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet = book.add_sheet('数据1',cell_overwrite_ok=True)
col = ('列1','列2','列3','列4','列5','列6','列7','列8','列9','列10')
for i in range(0,10):
sheet.write(0,i,col[i])
i=0
line=1
while(i<100):
text=href[0]
text1=href[0].find('td', {'class': 'el-table_1_column_1'})
t1= href[i].find('td', {'class': 'el-table_1_column_1'}).find_all('div', {'class': 'cell'})
t2= href[i].find('td', {'class': 'el-table_1_column_2'}).find_all('div', {'class': 'cell'})
t3= href[i].find('td', {'class': 'el-table_1_column_3'}).find_all('div', {'class': 'cell'})
t4= href[i].find('td', {'class': 'el-table_1_column_4'}).find_all('div', {'class': 'cell'})
t5= href[i].find('td', {'class': 'el-table_1_column_5'}).find_all('div', {'class': 'cell'})
t6= href[i].find('td', {'class': 'el-table_1_column_6'}).find_all('div', {'class': 'cell'})
t7= href[i].find('td', {'class': 'el-table_1_column_7'}).find_all('div', {'class': 'cell'})
t8= href[i].find('td', {'class': 'el-table_1_column_8'}).find_all('div', {'class': 'cell'})
t9= href[i].find('td', {'class': 'el-table_1_column_9'}).find_all('div', {'class': 'cell'})
t10= href[i].find('td', {'class': 'el-table_1_column_10'}).find_all('div', {'class': 'cell'})
ts1=str(t1[0].contents).replace("['","")
ts1=ts1.replace("']","")
sheet.write(line,0,ts1)
ts2=str(t2[0].contents).replace("['","")
ts2=ts2.replace("']","")
sheet.write(line,1,ts2)
ts3=str(t3[0].contents).replace("['","")
ts3=ts3.replace("']","")
sheet.write(line,2,ts3)
ts4=str(t4[0].contents).replace("['","")
ts4=ts4.replace("']","")
sheet.write(line,3,ts4)
ts5=str(t5[0].contents).replace("['","")
ts5=ts5.replace("']","")
sheet.write(line,4,ts5)
ts6=str(t6[0].contents).replace("['","")
ts6=ts6.replace("']","")
sheet.write(line,5,ts6)
ts7=str(t7[0].contents).replace("['","")
ts7=ts7.replace("']","")
sheet.write(line,6,ts7)
ts8=str(t8[0].contents).replace("['","")
ts8=ts8.replace("']","")
sheet.write(line,7,ts8)
ts9=str(t9[0].contents).replace("['","")
ts9=ts9.replace("']","")
ts9=ts9.replace("[]","无")
sheet.write(line,8,ts9)
ts10=str(t10[0].contents).replace("['","")
ts10=ts10.replace("']","")
ts10=ts10.replace("[]","无")
sheet.write(line,9,ts10)
info=str(ts1)+" "+str(ts2)+" "+str(ts3)+" "+str(ts4)+" "+str(ts5)+" "+str(ts6)+" "+str(ts7)+" "+str(ts8)+" "+str(ts9)+" "+str(ts10)
print(info)
i+=1
line+=1
page=1
sheetIndex=2
while(page<1987):#1987
#按钮点击下一页
objBtn=broswer.find_elements(By.CLASS_NAME, "btn-next") #objBtn.click()
objBtn[0].click()#broswer.execute_script("arguments[0].click();", objBtn)
r=random.randint(1,3)#表太过份,拿了就走,建议N>=3
time.sleep(r)
input_q = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'el-table')))
#print(input_q.text)
time.sleep(3)
html=wait._driver.page_source
soup = BeautifulSoup(html, features="html.parser")
href = soup.find_all('tr', {'class': 'el-table__row'})
while(len(href)==1):
input_q = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'el-table')))
time.sleep(3)
html=wait._driver.page_source
soup = BeautifulSoup(html, features="html.parser")
href = soup.find_all('tr', {'class': 'el-table__row'})
lens=len(href)
if(line>30000):
sheet = book.add_sheet('数据'+str(sheetIndex),cell_overwrite_ok=True)
sheetIndex+=1
line=1
col = ('列1','列2','列3','列4','列5','列6','列7','列8','列9','列10')
for i in range(0,10):
sheet.write(0,i,col[i])
i=0
while(i<len(href)):
text=href[0]
text1=href[0].find('td', {'class': 'el-table_1_column_1'})
t1= href[i].find('td', {'class': 'el-table_1_column_1'}).find_all('div', {'class': 'cell'})
t2= href[i].find('td', {'class': 'el-table_1_column_2'}).find_all('div', {'class': 'cell'})
t3= href[i].find('td', {'class': 'el-table_1_column_3'}).find_all('div', {'class': 'cell'})
t4= href[i].find('td', {'class': 'el-table_1_column_4'}).find_all('div', {'class': 'cell'})
t5= href[i].find('td', {'class': 'el-table_1_column_5'}).find_all('div', {'class': 'cell'})
t6= href[i].find('td', {'class': 'el-table_1_column_6'}).find_all('div', {'class': 'cell'})
t7= href[i].find('td', {'class': 'el-table_1_column_7'}).find_all('div', {'class': 'cell'})
t8= href[i].find('td', {'class': 'el-table_1_column_8'}).find_all('div', {'class': 'cell'})
t9= href[i].find('td', {'class': 'el-table_1_column_9'}).find_all('div', {'class': 'cell'})
t10= href[i].find('td', {'class': 'el-table_1_column_10'}).find_all('div', {'class': 'cell'})
ts1=str(t1[0].contents).replace("['","")
ts1=ts1.replace("']","")
sheet.write(line,0,ts1)
ts2=str(t2[0].contents).replace("['","")
ts2=ts2.replace("']","")
sheet.write(line,1,ts2)
ts3=str(t3[0].contents).replace("['","")
ts3=ts3.replace("']","")
sheet.write(line,2,ts3)
ts4=str(t4[0].contents).replace("['","")
ts4=ts4.replace("']","")
sheet.write(line,3,ts4)
ts5=str(t5[0].contents).replace("['","")
ts5=ts5.replace("']","")
sheet.write(line,4,ts5)
ts6=str(t6[0].contents).replace("['","")
ts6=ts6.replace("']","")
sheet.write(line,5,ts6)
ts7=str(t7[0].contents).replace("['","")
ts7=ts7.replace("']","")
sheet.write(line,6,ts7)
ts8=str(t8[0].contents).replace("['","")
ts8=ts8.replace("']","")
sheet.write(line,7,ts8)
ts9=str(t9[0].contents).replace("['","")
ts9=ts9.replace("']","")
ts9=ts9.replace("[]","无")
sheet.write(line,8,ts9)
ts10=str(t10[0].contents).replace("['","")
ts10=ts10.replace("']","")
ts10=ts10.replace("[]","无")
sheet.write(line,9,ts10)
info=str(ts1)+" "+str(ts2)+" "+str(ts3)+" "+str(ts4)+" "+str(ts5)+" "+str(ts6)+" "+str(ts7)+" "+str(ts8)+" "+str(ts9)+" "+str(ts10)
print(info)
i+=1
line+=1
page+=1
savepath = 'C:/Users/小陈的电脑/Desktop/test.xlsx'
book.save(savepath)
# 测试所用
test1()
下载模块使用下面这个
import pip
pip.main(["install","xlwt(库名称)"])