爬虫之自动查询12306车票

肝了一个晚上

这是自动查询车票并且写入csv的代码

from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import csv
import time

driver=webdriver.Chrome()
url="https://kyfw.12306.cn/otn/leftTicket/init"
driver.get(url)

out=open('d:/ticket.csv','w',newline='')
csv_write=csv.writer(out,dialect='excel')

#车站的起始站和终点站,时间日期
fromstation=input()
tostation=input()
month=int(input())#如果要查询本月的车票输入0,查询下个月的输入1
day=int(input())

#各个位置的Xpath
xfrom='//*[@id="fromStationText"]'
xto='//*[@id="toStationText"]'
xdate='//*[@id="train_date"]'
xmonth='//div[@class="cal-wrap"]//div[@class="cal-cm"]'
xbutton='//*[@id="query_ticket"]'
xtable='//*[@id="t-list"]/table'

#自动点击弹窗
xtanchuang='//*[@id="qd_closeDefaultWarningWindowDialog_id"]'
driver.find_element_by_xpath(xtanchuang).click()

def inputstation(xPath,station):
    city=driver.find_element_by_xpath(xPath)
    ActionChains(driver).click(city).send_keys(station).perform()
    station=city.find_element_by_xpath('//span[text()="'+station+'"]')
    station.click()

def inputdate(xMonth,xDate,startmonth,startdate):
    driver.find_element_by_xpath(xDate).click()
    Month=driver.find_elements_by_xpath(xMonth)[month]
    Day=Month.find_element_by_xpath('.//div[text()=%d]'%startdate)
    Day.click()
    
#在csv中写入车票信息的含义,例如车次,起始站等
def mulu():
    xpath_1='//*[@id="t-list"]/table/thead'
    tablel_1=driver.find_element_by_xpath(xpath_1).get_attribute('innerHTML')
    soup_1=BS(tablel_1,'html.parser')
    table_1=soup_1.find_all('tr')
    for row in table_1:
        cols=[col.text for col in row.find_all('th')]
        csv_write.writerow(cols)

#车票信息
def result():
    rows=driver.find_elements_by_xpath('//tr[starts-with(@id,"ticket")]')
    for row in rows:
        soup=BS(row.get_attribute('innerHTML'),"html.parser")
        tds=soup.find_all('td')
        train=[element.text for element in tds[0].find_all(['a','strong','span'])]
        train.remove('查看票价')
        train.remove('查看票价')
        temp=train[1:]
        lst=[train[0]]
        for i in range(0,6,2):
            lst.append(f'{temp[i]} {temp[i+1]}')
        lst.extend([td.text for td in tds[1:]])
        csv_write.writerow(lst)
    
inputstation(xfrom,fromstation)
inputstation(xto,tostation)
inputdate(xmonth,xdate,month,day)
driver.find_element_by_xpath(xbutton).click()
time.sleep(3)
mulu()
result()
out.close()
driver.close()

爬取结果如图
在这里插入图片描述

发布几天后有位网友指出上面代码存在的BUG,在这里特别感谢他
修改后的代码如下

from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import csv
import time

driver=webdriver.Chrome()
url="https://kyfw.12306.cn/otn/leftTicket/init"
driver.get(url)

out=open('d:/ticket.csv','w',newline='')
csv_write=csv.writer(out,dialect='excel')

#车站的起始站和终点站,时间日期
fromstation=input()
tostation=input()
month=int(input())#如果要查询本月的车票输入0,查询下个月的输入1
day=int(input())


#各个位置的Xpath
xfrom='//*[@id="fromStationText"]'
xto='//*[@id="toStationText"]'
xdate='//*[@id="train_date"]'
xmonth='//div[@class="cal-wrap"]//div[@class="cal-cm"]'
xbutton='//*[@id="query_ticket"]'
xtable='//*[@id="t-list"]/table'

#自动点击弹窗
xtanchuang='//*[@id="qd_closeDefaultWarningWindowDialog_id"]'
driver.find_element_by_xpath(xtanchuang).click()

def inputstation(xPath,station):
    city=driver.find_element_by_xpath(xPath)
    ActionChains(driver).click(city).send_keys(station).perform()
    station=city.find_element_by_xpath('//span[text()="'+station+'"]')
    station.click()

def inputdate(xMonth,xDate,startmonth,startdate):
    driver.find_element_by_xpath(xDate).click()
    Month=driver.find_elements_by_xpath(xMonth)[month]
    try:
        Day = Month.find_element_by_xpath('.//div[text()=%d]' % startdate)
    except:
        Day = Month.find_element_by_xpath('.//div[text()="今天"]')
    Day.click()
    
#在csv中写入车票信息的含义,例如车次,起始站等
def mulu():
    xpath_1='//*[@id="t-list"]/table/thead'
    tablel_1=driver.find_element_by_xpath(xpath_1).get_attribute('innerHTML')
    soup_1=BS(tablel_1,'html.parser')
    table_1=soup_1.find_all('tr')
    for row in table_1:
        cols=[col.text for col in row.find_all('th')]
        csv_write.writerow(cols)

#车票信息
def result():
    rows=driver.find_elements_by_xpath('//tr[starts-with(@id,"ticket")]')
    for row in rows:
        soup=BS(row.get_attribute('innerHTML'),"html.parser")
        tds=soup.find_all('td')
        train=[element.text for element in tds[0].find_all(['a','strong','span'])]
        train.remove('查看票价')
        train.remove('查看票价')
        temp=train[1:]
        lst=[train[0]]
        for i in range(0,6,2):
            lst.append(f'{temp[i]} {temp[i+1]}')
        lst.extend([td.text for td in tds[1:]])
        csv_write.writerow(lst)
    
inputstation(xfrom,fromstation)
inputstation(xto,tostation)
inputdate(xmonth,xdate,month,day)
driver.find_element_by_xpath(xbutton).click()
time.sleep(3)
mulu()
result()
out.close()
driver.close()
评论 10
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值