爬虫之自动查询12306车票

最新推荐文章于 2022-05-29 21:24:21 发布

听说不挂科

最新推荐文章于 2022-05-29 21:24:21 发布

阅读量672

点赞数 3

分类专栏： python

本文链接：https://blog.csdn.net/qq_53029299/article/details/115098417

版权

python 专栏收录该内容

63 篇文章

订阅专栏

肝了一个晚上

这是自动查询车票并且写入csv的代码

from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import csv
import time

driver=webdriver.Chrome()
url="https://kyfw.12306.cn/otn/leftTicket/init"
driver.get(url)

out=open('d:/ticket.csv','w',newline='')
csv_write=csv.writer(out,dialect='excel')

#车站的起始站和终点站，时间日期
fromstation=input()
tostation=input()
month=int(input())#如果要查询本月的车票输入0，查询下个月的输入1
day=int(input())

#各个位置的Xpath
xfrom='//*[@id="fromStationText"]'
xto='//*[@id="toStationText"]'
xdate='//*[@id="train_date"]'
xmonth='//div[@class="cal-wrap"]//div[@class="cal-cm"]'
xbutton='//*[@id="query_ticket"]'
xtable='//*[@id="t-list"]/table'

#自动点击弹窗
xtanchuang='//*[@id="qd_closeDefaultWarningWindowDialog_id"]'
driver.find_element_by_xpath(xtanchuang).click()

def inputstation(xPath,station):
    city=driver.find_element_by_xpath(xPath)
    ActionChains(driver).click(city).send_keys(station).perform()
    station=city.find_element_by_xpath('//span[text()="'+station+'"]')
    station.click()

def inputdate(xMonth,xDate,startmonth,startdate):
    driver.find_element_by_xpath(xDate).click()
    Month=driver.find_elements_by_xpath(xMonth)[month]
    Day=Month.find_element_by_xpath('.//div[text()=%d]'%startdate)
    Day.click()
    
#在csv中写入车票信息的含义，例如车次，起始站等
def mulu():
    xpath_1='//*[@id="t-list"]/table/thead'
    tablel_1=driver.find_element_by_xpath(xpath_1).get_attribute('innerHTML')
    soup_1=BS(tablel_1,'html.parser')
    table_1=soup_1.find_all('tr')
    for row in table_1:
        cols=[col.text for col in row.find_all('th')]
        csv_write.writerow(cols)

#车票信息
def result():
    rows=driver.find_elements_by_xpath('//tr[starts-with(@id,"ticket")]')
    for row in rows:
        soup=BS(row.get_attribute('innerHTML'),"html.parser")
        tds=soup.find_all('td')
        train=[element.text for element in tds[0].find_all(['a','strong','span'])]
        train.remove('查看票价')
        train.remove('查看票价')
        temp=train[1:]
        lst=[train[0]]
        for i in range(0,6,2):
            lst.append(f'{temp[i]} {temp[i+1]}')
        lst.extend([td.text for td in tds[1:]])
        csv_write.writerow(lst)
    
inputstation(xfrom,fromstation)
inputstation(xto,tostation)
inputdate(xmonth,xdate,month,day)
driver.find_element_by_xpath(xbutton).click()
time.sleep(3)
mulu()
result()
out.close()
driver.close()

爬取结果如图
在这里插入图片描述

发布几天后有位网友指出上面代码存在的BUG，在这里特别感谢他
修改后的代码如下

from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import csv
import time

driver=webdriver.Chrome()
url="https://kyfw.12306.cn/otn/leftTicket/init"
driver.get(url)

out=open('d:/ticket.csv','w',newline='')
csv_write=csv.writer(out,dialect='excel')

#车站的起始站和终点站，时间日期
fromstation=input()
tostation=input()
month=int(input())#如果要查询本月的车票输入0，查询下个月的输入1
day=int(input())


#各个位置的Xpath
xfrom='//*[@id="fromStationText"]'
xto='//*[@id="toStationText"]'
xdate='//*[@id="train_date"]'
xmonth='//div[@class="cal-wrap"]//div[@class="cal-cm"]'
xbutton='//*[@id="query_ticket"]'
xtable='//*[@id="t-list"]/table'

#自动点击弹窗
xtanchuang='//*[@id="qd_closeDefaultWarningWindowDialog_id"]'
driver.find_element_by_xpath(xtanchuang).click()

def inputstation(xPath,station):
    city=driver.find_element_by_xpath(xPath)
    ActionChains(driver).click(city).send_keys(station).perform()
    station=city.find_element_by_xpath('//span[text()="'+station+'"]')
    station.click()

def inputdate(xMonth,xDate,startmonth,startdate):
    driver.find_element_by_xpath(xDate).click()
    Month=driver.find_elements_by_xpath(xMonth)[month]
    try:
        Day = Month.find_element_by_xpath('.//div[text()=%d]' % startdate)
    except:
        Day = Month.find_element_by_xpath('.//div[text()="今天"]')
    Day.click()
    
#在csv中写入车票信息的含义，例如车次，起始站等
def mulu():
    xpath_1='//*[@id="t-list"]/table/thead'
    tablel_1=driver.find_element_by_xpath(xpath_1).get_attribute('innerHTML')
    soup_1=BS(tablel_1,'html.parser')
    table_1=soup_1.find_all('tr')
    for row in table_1:
        cols=[col.text for col in row.find_all('th')]
        csv_write.writerow(cols)

#车票信息
def result():
    rows=driver.find_elements_by_xpath('//tr[starts-with(@id,"ticket")]')
    for row in rows:
        soup=BS(row.get_attribute('innerHTML'),"html.parser")
        tds=soup.find_all('td')
        train=[element.text for element in tds[0].find_all(['a','strong','span'])]
        train.remove('查看票价')
        train.remove('查看票价')
        temp=train[1:]
        lst=[train[0]]
        for i in range(0,6,2):
            lst.append(f'{temp[i]} {temp[i+1]}')
        lst.extend([td.text for td in tds[1:]])
        csv_write.writerow(lst)
    
inputstation(xfrom,fromstation)
inputstation(xto,tostation)
inputdate(xmonth,xdate,month,day)
driver.find_element_by_xpath(xbutton).click()
time.sleep(3)
mulu()
result()
out.close()
driver.close()