from lxml import etree
from lxml import etree
import re
import time
import json
import requests
import csv
import codecs
from pyquery import PyQuery as pq
from mouse import move,click
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import time
import csv
import random
from pyquery import PyQuery as pq
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
with open("school.json",encoding="utf-8-sig") as fp:
datas=json.load(fp)
path = 'D:\Exam\chromedriver.exe'
browser = webdriver.Chrome(executable_path=path)
for l in datas:
a=list(l.keys())[0]
school_name=l.get(a)
print(a,school_name)
url = school_name+"/provinceline"
browser.get(url)
browser.maximize_window()
time.sleep(3.5)
browser.find_element_by_xpath('//*[@id="root"]/div/div/div/div/div/div[1]/div[1]/div[3]/div/div[2]/p').click()
time.sleep(3.5)
#//*[@id="root"]/div/div/div/div/div/div/div[1]/div[3]/div/div[2]/p
browser.find_element_by_link_text("太原市").click()
time.sleep(3.5)
source = browser.page_source
tree=etree.HTML(source)
div_list=tree.xpath('//*[@id="root"]/div/div[1]/div/div/div/div[1]/div[3]/div[1]/div[1]/div/div[1]/div[2]/div[1]/table/tbody/tr')
for div in div_list:
try:
if len(div.xpath('./td/text()'))==0:
continue
data=div.xpath('./td/text()')
li=[]
li.append(a)
li.append("理科")
li.append(data[0])
li.append(data[1])
li.append(data[2])
li.append(data[3].split("/")[0])
li.append(data[3].split("/")[1])
li.append(data[4])
li.append(data[5])
print(li)
with open('new山西省.csv', 'a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(li)
except:pass
move(754,691)
click()
move(730,721)
click()
time.sleep(2)
source = browser.page_source
tree=etree.HTML(source)
div_list=tree.xpath('//*[@id="root"]/div/div[1]/div/div/div/div[1]/div[3]/div[1]/div[1]/div/div[1]/div[2]/div[1]/table/tbody/tr')
for div in div_list:
try:
if len(div.xpath('./td/text()'))==0:
continue
data=div.xpath('./td/text()')
li=[]
li.append(a)
li.append("理科")
li.append(data[0])
li.append(data[1])
li.append(data[2])
li.append(data[3].split("/")[0])
li.append(data[3].split("/")[1])
li.append(data[4])
li.append(data[5])
print(li)
with open('new山西省.csv', 'a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(li)
except:pass
move(862,695)
click()
move(822,758)
click()
time.sleep(2)
source = browser.page_source
tree=etree.HTML(source)
div_list=tree.xpath('//*[@id="root"]/div/div[1]/div/div/div/div[1]/div[3]/div[1]/div[1]/div/div[1]/div[2]/div[1]/table/tbody/tr')
for div in div_list:
try:
if len(div.xpath('./td/text()'))==0:
continue
data=div.xpath('./td/text()')
li=[]
li.append(a)
li.append("文科")
li.append(data[0])
li.append(data[1])
li.append(data[2])
li.append(data[3].split("/")[0])
li.append(data[3].split("/")[1])
li.append(data[4])
li.append(data[5])
print(li)
with open('new山西省.csv', 'a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(li)
except:pass
move(751,694)
click()
move(731,749)
click()
move(862,695)
click()
move(822,758)
click()
time.sleep(2)
source = browser.page_source
tree=etree.HTML(source)
div_list=tree.xpath('//*[@id="root"]/div/div[1]/div/div/div/div[1]/div[3]/div[1]/div[1]/div/div[1]/div[2]/div[1]/table/tbody/tr')
for div in div_list:
try:
if len(div.xpath('./td/text()'))==0:
continue
data=div.xpath('./td/text()')
li=[]
li.append(a)
li.append("文科")
li.append(data[0])
li.append(data[1])
li.append(data[2])
li.append(data[3].split("/")[0])
li.append(data[3].split("/")[1])
li.append(data[4])
li.append(data[5])
print(li)
with open('new山西省.csv', 'a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(li)
except:pass
time.sleep(random.randint(4,8))
高考数据获取
最新推荐文章于 2024-05-19 23:31:32 发布