肝了一个晚上
这是自动查询车票并且写入csv的代码
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import csv
import time
driver=webdriver.Chrome()
url="https://kyfw.12306.cn/otn/leftTicket/init"
driver.get(url)
out=open('d:/ticket.csv','w',newline='')
csv_write=csv.writer(out,dialect='excel')
#车站的起始站和终点站,时间日期
fromstation=input()
tostation=input()
month=int(input())#如果要查询本月的车票输入0,查询下个月的输入1
day=int(input())
#各个位置的Xpath
xfrom='//*[@id="fromStationText"]'
xto='//*[@id="toStationText"]'
xdate='//*[@id="train_date"]'
xmonth='//div[@class="cal-wrap"]//div[@class="cal-cm"]'
xbutton='//*[@id="query_ticket"]'
xtable='//*[@id="t-list"]/table'
#自动点击弹窗
xtanchuang='//*[@id="qd_closeDefaultWarningWindowDialog_id"]'
driver.find_element_by_xpath(xtanchuang).click()
def inputstation(xPath,station):
city=driver.find_element_by_xpath(xPath)
ActionChains(driver).click(city).send_keys(station).perform()
station=city.find_element_by_xpath('//span[text()="'+station+'"]')
station.click()
def inputdate(xMonth,xDate,startmonth,startdate):
driver.find_element_by_xpath(xDate).click()
Month=driver.find_elements_by_xpath(xMonth)[month]
Day=Month.find_element_by_xpath('.//div[text()=%d]'%startdate)
Day.click()
#在csv中写入车票信息的含义,例如车次,起始站等
def mulu():
xpath_1='//*[@id="t-list"]/table/thead'
tablel_1=driver.find_element_by_xpath(xpath_1).get_attribute('innerHTML')
soup_1=BS(tablel_1,'html.parser')
table_1=soup_1.find_all('tr')
for row in table_1:
cols=[col.text for col in row.find_all('th')]
csv_write.writerow(cols)
#车票信息
def result():
rows=driver.find_elements_by_xpath('//tr[starts-with(@id,"ticket")]')
for row in rows:
soup=BS(row.get_attribute('innerHTML'),"html.parser")
tds=soup.find_all('td')
train=[element.text for element in tds[0].find_all(['a','strong','span'])]
train.remove('查看票价')
train.remove('查看票价')
temp=train[1:]
lst=[train[0]]
for i in range(0,6,2):
lst.append(f'{temp[i]} {temp[i+1]}')
lst.extend([td.text for td in tds[1:]])
csv_write.writerow(lst)
inputstation(xfrom,fromstation)
inputstation(xto,tostation)
inputdate(xmonth,xdate,month,day)
driver.find_element_by_xpath(xbutton).click()
time.sleep(3)
mulu()
result()
out.close()
driver.close()
爬取结果如图
发布几天后有位网友指出上面代码存在的BUG,在这里特别感谢他
修改后的代码如下
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import csv
import time
driver=webdriver.Chrome()
url="https://kyfw.12306.cn/otn/leftTicket/init"
driver.get(url)
out=open('d:/ticket.csv','w',newline='')
csv_write=csv.writer(out,dialect='excel')
#车站的起始站和终点站,时间日期
fromstation=input()
tostation=input()
month=int(input())#如果要查询本月的车票输入0,查询下个月的输入1
day=int(input())
#各个位置的Xpath
xfrom='//*[@id="fromStationText"]'
xto='//*[@id="toStationText"]'
xdate='//*[@id="train_date"]'
xmonth='//div[@class="cal-wrap"]//div[@class="cal-cm"]'
xbutton='//*[@id="query_ticket"]'
xtable='//*[@id="t-list"]/table'
#自动点击弹窗
xtanchuang='//*[@id="qd_closeDefaultWarningWindowDialog_id"]'
driver.find_element_by_xpath(xtanchuang).click()
def inputstation(xPath,station):
city=driver.find_element_by_xpath(xPath)
ActionChains(driver).click(city).send_keys(station).perform()
station=city.find_element_by_xpath('//span[text()="'+station+'"]')
station.click()
def inputdate(xMonth,xDate,startmonth,startdate):
driver.find_element_by_xpath(xDate).click()
Month=driver.find_elements_by_xpath(xMonth)[month]
try:
Day = Month.find_element_by_xpath('.//div[text()=%d]' % startdate)
except:
Day = Month.find_element_by_xpath('.//div[text()="今天"]')
Day.click()
#在csv中写入车票信息的含义,例如车次,起始站等
def mulu():
xpath_1='//*[@id="t-list"]/table/thead'
tablel_1=driver.find_element_by_xpath(xpath_1).get_attribute('innerHTML')
soup_1=BS(tablel_1,'html.parser')
table_1=soup_1.find_all('tr')
for row in table_1:
cols=[col.text for col in row.find_all('th')]
csv_write.writerow(cols)
#车票信息
def result():
rows=driver.find_elements_by_xpath('//tr[starts-with(@id,"ticket")]')
for row in rows:
soup=BS(row.get_attribute('innerHTML'),"html.parser")
tds=soup.find_all('td')
train=[element.text for element in tds[0].find_all(['a','strong','span'])]
train.remove('查看票价')
train.remove('查看票价')
temp=train[1:]
lst=[train[0]]
for i in range(0,6,2):
lst.append(f'{temp[i]} {temp[i+1]}')
lst.extend([td.text for td in tds[1:]])
csv_write.writerow(lst)
inputstation(xfrom,fromstation)
inputstation(xto,tostation)
inputdate(xmonth,xdate,month,day)
driver.find_element_by_xpath(xbutton).click()
time.sleep(3)
mulu()
result()
out.close()
driver.close()