```python
import re
import time
import pandas as pds
import numpy
import urllib.request
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
browser = webdriver.Chrome()
def enter(url,element):
wait = WebDriverWait(browser, 2)
try:
browser.get(url)
wait.until(
EC.presence_of_element_located((By.XPATH,element)),
)
except TimeoutException:
result = "在"+url+'\n'+'未定位到'+element
print(result)
def get_detail(element):
try:
elements = browser.find_element_by_xpath(element)
detail = elements.text
except :
detail = "无"
return detail
def get_element_attribute(element, attribute):
elements = browser.find_element_by_xpath(element)
return elements.get_attribute(attribute)
def click_element(element):
elements = browser.find_element_by_xpath(element).click()
def send_word(element,text):
elements = browser.find_element_by_xpath(element)
elements.send_keys(text)
elements.send_keys(Keys.ENTER)
def clear_word(element):
elements = browser.find_element_by_xpath(element).clear()
def get_ele_cnt(element):
lis = browser.find_elements_by_xpath(element)
return len(lis)
def get_each_class(element1,element2):
m = get_detail(element1)
lis = browser.find_elements_by_xpath(element2)
n = lis[-1].text
return m,n
def get_class_detail(element):
classname = get_detail(element+'//div[@class="item_header"]/div[1]')
teaching_mode = get_detail(element+'//div[@class="item_header"]/div[2]')
dtbegindate = get_detail(element+'//div[@class="item_info"]/span[1]')
dtdate = get_detail(element+'//div[@class="item_info"]/span[2]')
address = get_detail(element+'//div[@class="item_info"]/span[3]')
teacher_main = get_detail(element+'//div[@class="teacher"]/div[@class="teacher_main"]')
teacher_vice = get_detail(element+'//div[@class="teacher"]/div[@class="teacher_vice"]')
if teaching_mode=="在线":
teacher_tag = get_detail(element+'//div[@class="teacher"]/div[@class="remain_tag"]')
else:
teacher_tag = get_detail(element+'//div[@class="teacher"]/div[@class="teacher_tag"]')
class_price = get_detail(element+'//div[@class="item_footer"]/div[@class="left"]')
return classname,teaching_mode,dtbegindate,dtdate,address,teacher_main,teacher_vice,teacher_tag,class_price
def sdept_grade(i,j):
if i == 1:
sdept = "幼儿部"
if j == 1:
classtype = "托班"
elif j == 2:
classtype = "小班"
elif j==3:
classtype = "中班"
elif j==4:
classtype = "大班"
elif i ==2:
sdept = "小学部"
if j == 1:
classtype = "一年级"
elif j == 2:
classtype = "二年级"
elif j==3:
classtype = "三年级"
elif j==4:
classtype = "四年级"
elif j==5:
classtype = "五年级"
elif j==6:
classtype = "六年级"
elif j==7:
classtype = "小学组"
elif i==3:
sdept = "初中部"
if j == 1:
classtype = "初一"
elif j == 2:
classtype = "初二"
elif j==3:
classtype = "中考"
elif j==4:
classtype = "初中组"
else:
sdept = "高中部"
if j == 1:
classtype = "高一"
elif j == 2:
classtype = "高二"
elif j==3:
classtype = "高考"
elif j==4:
classtype = "高中组"
return (sdept,classtype)
def write_csv(i , school):
writeschool=pds.DataFrame([[i,school]])
writeschool.to_csv('C:/Users/Administrator/Desktop/一批文分数线.csv', sep=',', mode='a',index = False,header = False)
def main():
url = 'https://www.speiyou.com/shanxi_xian/list'
enter(url, '//*[@id="test"]/div/ul/li[1]/a')
click_element('//div[@class="modal_btn"]')
for i in range(1,5):
if i == 2:
jj = 8
else:
jj = 5
for j in range(1,jj):
(sdept,grade)=sdept_grade(i,j)
click_element('//*[@id="__layout"]/div/header/div[3]/div/span/div[2]/span')
click_element('//div[@class="grade_container"]//li['+str(i)+']/div/span['+str(j)+']')
time.sleep(3)
(m,n)=get_each_class('//span[@class="el-pagination__total"]','//ul[@class="el-pager"]//li')
print(m, n)
for page in range(1,int(n)+1):
print(page)
click_element('//*[@id="__layout"]/div/div/section/div[3]/div/button[2]/i')
classcnt = get_ele_cnt('//*[@id="__layout"]/div/div/section/div[2]/div[@class="card_list"]/div')
for k in range(1,classcnt+1):
(classname,teaching_mode,dtbegindate,dtdate,address,teacher_main,teacher_vice,teacher_tag,class_price)=get_class_detail('//*[@id="__layout"]/div/div/section/div[2]/div[@class="card_list"]/div['+str(k)+']')
s_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
writeschool=pds.DataFrame([[s_time,sdept,grade,m,classname,teaching_mode,dtbegindate,dtdate,address,teacher_main,teacher_vice,teacher_tag,class_price]])
writeschool.to_csv('C:/Users/Administrator/Desktop/学而思finnal.csv', sep=',', mode='a',index = False,header = False,encoding='utf_8_sig')
browser.close()
if __name__ == "__main__":
main()