一個更好用的crawler,完全模仿人為操作。
也可以用於自動化網頁測試。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import xlsxwriter
workbook = xlsxwriter.Workbook('abc.xlsx')
worksheet = workbook.add_worksheet('contract')
driver = webdriver.Firefox()
for page in range(1, 10):
driver.get("https://www.abc.com="+str(page))
elem = driver.find_element(By.CLASS_NAME, "result")
rows = elem.find_elements(By.CLASS_NAME,"row")
rowNum = 0
for row in rows:
rowNum = rowNum+1
if page >1 and rowNum ==1 :
continue
rowNumTotal = (page-1)*10+rowNum
colNum = 0
cols = row.find_elements(By.CLASS_NAME,"col")
for col in cols:
colNum= colNum+1
if colNum<7:
worksheet.write(rowNumTotal-1, colNum-1, col.text)
elif colNum==7:
print(col.text)
all_a = col.find_elements(By.TAG_NAME,"a" )
if len(all_a)>0:
a = all_a[0]
# a.click()
worksheet.write(rowNumTotal-1, colNum-1, a.get_attribute("href"))
workbook.close()
driver.close()