Python爬虫练手项目-------- 12306抢票小程序(selenium+requests+BeautifulSoup)

前言

学习完pyhton基本语法,selenium,requests和BeautifulSoup三个库之后,做了一个简单的12306抢票小程序,功能实现的有些粗糙,结构也不是很清晰。。

附上源码:

from selenium import webdriver
from twilio.rest import Client
from selenium.webdriver import ActionChains
import requests
from bs4 import BeautifulSoup
import time
import base64
import sys
import re


def login(num):
     element = web.find_element_by_class_name("lgcode-refresh")
     try:
          for i in num:
               if(i <= 4):
                    ActionChains(web).move_to_element_with_offset(element,20-(80*(4-i)),70).click().perform()
               else:
                    i-=4
                    ActionChains(web).move_to_element_with_offset(element,20-(80*(4-i)),140).click().perform()
               time.sleep(3)
     except:
          print("验证码自动点击失败,请重启程序!!!!")
          
     
     
def get_num():
     num = []
     r = web.page_source
     html = BeautifulSoup(r,"html.parser")
     img_url = html.findAll("img",{"id":"J-loginImg"})[0].get("src")
     img_url = img_url.split(',')[1]
     img_url = base64.urlsafe_b64decode(img_url + '='*(4-len(img_url)%4))
     with open("验证码.png","wb") as f:
          f.write(img_url)
     files = {'file':open('验证码.png','rb')}
     try:
          r = requests.post("http://littlebigluo.qicp.net:47720/",files = files,timeout=5)
          html = BeautifulSoup(r.text,"html.parser")
          for i in html.findAll("b")[0].string:
               if i!= ' ':
                    num.append(int(i))
               else:
                    continue
     except:
          print("自动识别失败,请手动打开验证码手动输入照片位置(第一排从左到右依次为1~4,第二排从左到右依次为5~8)!!")
          img_location = input("请输入,输入示例:1,2,3  ")
          for i in img_location:
               if i != ',':
                    num.append(int(i))   
     return num

def get_Ticket_information():  #填写要购买的票的信息。
     print("*****************************************************")
     print("*****************请填写购票信息************************")
     print()
     choose = input("单程还是往返:")
     if choose == "单程":
          Ticket_information = {"出发地":input("请输入出发地:"),
                                "目的地":input("请输入目的地:"),
                                "出发日":input("请输入出发日(格式 2018-11-20):"),
                                "票型":input("请输入学生票或者普通票:"),
                                "单程":"Yes"}
          Ticket_information["返程日"] ==Ticket_information["出发日"] 
                                

     else:
          Ticket_information = {"出发地":input("请输入出发地:"),
                                "目的地":input("请输入目的地:"),
                                "出发日":input("请输入出发日(格式 2018-11-20):"),
                                "返程日":input("请输入返程日(格式 2018-11-20):"),
                                "票型":input("请输入学生票或者普通票:"),
                                "单程":"No"}
     """
     Ticket_information = {"出发地":"天津",
                                "目的地":"太原",
                                "出发日":"2019-1-27",
                                "票型":"学生票",
                                "单程":"Yes"}
     Ticket_information["返程日"] = "2019-1-27"
     """
     print("****************************************************")
     for i in Ticket_information:
          print(i,":",Ticket_information[i])
     print("****************************************************")
     
     return Ticket_information

                                


def tx_ticket(Tickct_information,cookies): #填写搜索信息
          time.sleep(1)
          web.find_element_by_link_text(u"车票").click()
          web.find_element_by_link_text(u"单程").click()
          for i in cookies:
               web.add_cookie(i)
          web.refresh() 
          if Ticket_information["票型"] == "学生票": #如果是学生票的话
               what_ticket = 1
               if web.find_element_by_id("sf2").is_enabled():
                    web.find_element_by_id("sf2").click()
               else:
                    print("该日期无法使用学生票,具体时间请上12306官网查询。")
                    if int(input("如需继续购买普通票,请输入1,退出请输入0:")):
                         Ticket_information["票型"] = "普通票"
                    else:
                         web.quit()
                         sys.exit(0)
          print("正在打印余票信息!!!!!!!!!!!!!")

def get_cookie(Ticket_information):
     station = requests.get("https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9018").text.split("@")
     time.sleep(1)
     station_information ={}
     for i in range(1,len(station)):
          station_information[station[i].split("|")[1]] = station[i].split("|")[2]
     for  i in station_information:
          if(Ticket_information["出发地"] == i):
               from_station_number = station_information[i]
          if(Ticket_information["目的地"] == i):
               to_station_number = station_information[i]
     cookies = [
                {'domain': 'kyfw.12306.cn', 'expiry': 1574778226, 'httpOnly': False, 'name': '_jc_save_toStation', 'path': '/', 'secure': False, 'value': Ticket_information["目的地"].encode("unicode_escape").decode("utf-8").replace("\\","%")+"%2C"+to_station_number}, 
                {'domain': 'kyfw.12306.cn', 'expiry': 1574778226, 'httpOnly': False, 'name': '_jc_save_fromDate', 'path': '/', 'secure': False, 'value': Ticket_information["出发日"]}, 
                {'domain': 'kyfw.12306.cn', 'expiry': 1574778226, 'httpOnly': False, 'name': '_jc_save_fromStation', 'path': '/', 'secure': False, 'value': Ticket_information["出发地"].encode("unicode_escape").decode("utf-8").replace("\\","%")+"%2C"+from_station_number},
                {'domain': 'kyfw.12306.cn', 'expiry': 1574778226, 'httpOnly': False, 'name': '_jc_save_toDate', 'path': '/', 'secure': False, 'value': Ticket_information["返程日"]}
               ]
     return cookies
     
def print_Ticket():
     html = BeautifulSoup(web.page_source,"html.parser")
     train_number = {"车次":""}                              #列车车次
     go_station = ["出发站"]                              #出发/到达站
     to_station = ["到达站"]
     go_time = ["出发时间"]                                 #出发/到达的时间
     to_time = ["到达时间"]
     driving_time = ["历时"]                                  #行驶时间
     SWZ = ["商务座"]
     ZY = ["一等座"]
     ZE = ["二等座"]
     GR = ["高级软卧"]
     RW = ["软卧"]
     DW = ["动卧"]
     YW = ["硬卧"]
     RZ = ["软座"]
     YZ = ["硬座"]
     WZ = ["无座"]
     QT = ["其他"]
     
     for i in html.findAll("a",{"class":"number"}):
          train_number[i.string] = i["id"].split("_")[0]
     for i in html.findAll("div",{"class":"cdz"}):
          go_station.append(i.strong.string)  
          to_station.append(i.strong.next_sibling.string)
     for i in html.findAll("div",{"class":"cds"}):
          go_time.append(i.strong.string)
          to_time.append(i.strong.next_sibling.string)
     for i in html.findAll("div",{"class":"ls"}):
          driving_time.append(i.strong.string)
     for i in html.findAll("td",{"id":re.compile(r'^TZ_\w.|SWZ_\w.')}):
          if((i.string == '--')|(i.string == "有")):
               SWZ.append(i.string)
          else:
               SWZ.append(i.div.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^ZY_\w.')}):
          if((i.string == '--')|(i.string == "有")):
               ZY.append(i.string)
          else:
               ZY.append(i.div.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^ZE_\w.')}):
          if((i.string == '--')|(i.string == "有")):
               ZE.append(i.string)
          else:
               ZE.append(i.div.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^GR_\w.')}):
               GR.append(i.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^RW_\w.')}):
               RW.append(i.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^SRRB_\w.')}):
               DW.append(i.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^YW_\w.')}):
               YW.append(i.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^RZ_\w.')}):
               RZ.append(i.string)

     for i in html.findAll("td",{"id":re.compile(r'^YZ_\w.')}):
               YZ.append(i.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^WZ_\w.')}):
               WZ.append(i.string)
               
     for i in html.findAll("td",{"id":re.compile(r'^QT_\w.')}):
               QT.append(i.string)
     
     print("*******************余票信息**************************")
     for i in range(len(train_number)):
          print(list(train_number.keys())[i],end = '')
          print("\t",end = '')
          print(go_station[i],end = '')
          print("——",end = '')
          print(to_station[i],end = '')
          print("\t",end = '')
          print(go_time[i],end = '')
          print("——",end = '')
          print(to_time[i],end = '')
          print("\t",end = '')
          print(driving_time[i],end = '')
          print("\t",end = '')
          print(SWZ[i],end = '')
          print("\t",end = '')
          print(ZY[i],end = '')
          print("\t",end = '')
          print(ZE[i],end = '')
          print("\t",end = '')
          print(GR[i],end = '')
          print("\t",end = '')
          print(RW[i],end = '')
          print("\t",end = '')
          print(DW[i],end = '')
          print("\t",end = '')
          print(YW[i],end = '')
          print("\t",end = '')
          print(RZ[i],end = '')
          print("\t",end = '')
          print(YZ[i],end = '')
          print("\t",end = '')
          print(WZ[i],end = '')
          print("\t",end = '')
          print(QT[i])
          print("\n")
          if((i%10 == 0)&(i != 0)):
               print("\t")
               print("车次\t出发站——到达站\t出发时间——到达时间\t历时\t商务座\t一等座\t二等座\t高级软卧\t软卧\t动卧\t硬卧\t软座\t硬作为\t无座\t其他",end = '\n\n')
     return train_number

def booking_ticket(train_number):    #预定需要的车票
     want_train_number = input("请输入想要预定的车次(如:C2608,字母用大写):")
     while(True):
          if(want_train_number in train_number):          
               train_id ="ticket_"+ BeautifulSoup(web.page_source,"html.parser").find('a',string = want_train_number)['id'].split("_")[0]
               try:
                    web.find_element_by_xpath("//tr[@id = '%s']/td[@align = 'center']/a" % train_id).click()
                    return train_id
               except:
                    QP_choose = input("该车次暂无余票,请输入YES/NO/OUT(YES-抢该车次的票,NO-选择其他车次,OUT-退出程序)")
                    if (QP_choose == "YES"):
                         if(qiang_ticket(train_id)):   #抢票成功
                              sys.exit(0)
                         else:
                              print("抢票失败。。。")
                    if (QP_choose == "NO"):
                         want_train_number = input("请输入想要预定的车次(如:C2608,字母用大写):")
                         continue
                    if (QP_choose == "OUT"):
                         web.quit
                         sys.exit(0)
                         
          want_train_number = input("没有查找到相应车次!请输入正确的车次(输入'OUT'结束):")
          if(want_train_number == "OUT"):
               web.quit
               sys.exit(0)


def buy_ticket(train_id):                 #提交订单  
     print("-----------------------------------------------------------")
     html = BeautifulSoup(web.page_source,"html.parser")
     try:
          ticket_head = html.findAll("p",{"id":"ticket_tit_id"})[0].text                  #有BUG!!!!!!
     except:
          ticket_head = html.findAll("p",{"id":"ticket_tit_id"})[0].text
     print(ticket_head)
     for i in html.findAll("p",{"id":"ticket_con_id"}):
          print(i.text,end = "\t")
     print(html.findAll("p",{"style":"color: #3177BF;"})[0].text)
     print("**************************************************")
     seatType = input("选择输入购买的坐席(购买无座时请确认无座有余票):")
     seat_1 = {"硬卧":"3","硬座":"1","软卧":"4","二等座":"O","一等座":"M","特等座":"P","商务座":"P"}    #座位对应下拉选项的序号
     able_seat = ["无座"]
     for i in html.findAll("select",{"id":"seatType_1"})[0].findAll("option"):      #判断能购买的坐席有哪些
          able_seat.append(i.string.strip().split("(")[0])
     if(seatType in able_seat):                                                     #如果想买的坐席可以买的话,就添加
          if(seatType == "无座"):                                                  #判断是不是无座
               if(("二等座"or"硬座") in able_seat):
                    try:                                            
                         web.find_element_by_xpath("//select[@id = 'seatType_1']/option[@value = '1']").click()     #如果是无座就买硬座
                    except:
                         web.find_element_by_xpath("//select[@id = 'seatType_1']/option[@value = 'O']").click()             #如果报错就说明是高铁,无座就买二等座
               else:
                    if(input("所选坐席没有余票,抢票-YES,退出-OUT") == "YES"):
                         if(qiang_ticket(train_id)):
                              pass
                         else:
                              print("抢票失败。。。")
         
               
          else:
               web.find_element_by_xpath("//select[@id = 'seatType_1']/option[@value = '%s']"%seat_1[seatType]).click()    #不是无座的话就按相应的买
          print("请选择乘客信息(最多5位),如需添加/修改乘客信息,请到12306官网操作:")
          web.implicitly_wait(30)
          while(True):                                                               #添加乘客信息
               html = BeautifulSoup(web.page_source,"html.parser")
               print(html.findAll("ul",{"id":"normal_passenger_id"})[0].text)
               xs_person = []
               pt_person = []
               pt_name = input("输入购买普通票的乘客多位乘客中间用中文输入法下的 ,隔开")        #如果购买的是普通票的话
               for i in pt_name.split(","):
                    pt_person.append(i)
               if what_ticket:                                                           #如果购买的是学生票的话
                    xs_name = input("输入购买学生票的乘客,多位乘客中间用中文输入法下的 ,隔开")
                    for i in xs_name.split(","):
                         xs_person.append(i)
               if(panduan_how_many(seatType,len(xs_person)+len(pt_person))):                 #判断所选乘客数量和余票数量是否匹配                  
                    if what_ticket:                                                           #如果购买的是学生票的话
                         for i in html.findAll("label"):
                              if i.text.spilt("(")[0] in xs_person:
                                   try:
                                        web.find_element_by_id(i["for"]).click()
                                   except:
                                        web.find_element_by_id("dialog_xsertcj_ok").click()
                              
                              web.find_element_by_id("submitOrder_id").click()
                    else:
                         for i in html.findAll("label"):
                              if i.text.split("(")[0] in pt_person:
                                        try:
                                             web.find_element_by_id(i["for"]).click()
                                        except:
                                             web.find_element_by_id("dialog_xsertcj_cancel").click()
                              
                              web.find_element_by_id("submitOrder_id").click()


                                                                                         #提交订单
                    print("--------------------------------------------------------------")
                    print("-----------------------正在打印乘客信息-------------------------")
                    time.sleep(2)
                    result = ""                                                           #将已选乘客的信息打印出来
                    html = BeautifulSoup(web.page_source,"html.parser")
                    for i in html.findAll("tbody",{"id":"check_ticketInfo_id"})[0].findAll("tr"):
                         for each in i.text.strip():
                              if(each=="\n"):
                                   result = result+"\t"
                                   continue
                              if(each==' '):
                                   continue
                              else:
                                   result = result+each
                                        
                         print(result)
                         result = ""
                         web.find_element_by_id("qr_submit_id").click()
                    break

               else:                                                                         #所选乘客数量和余票数量不匹配
                    if (input("余票不足,请重新选择乘客,或者退出(YES/NO)") == "NO" ):
                         web.quit()
                         sys.exit(0)
                    
     else:
          if(input("所选坐席没有余票,抢票-YES,退出-OUT") == "YES"):
               if(qiang_ticket(train_id)):
                    pass
               else:
                    print("抢票失败。。。")
         
          







def panduan_how_many(seatType,person_num):
     html = BeautifulSoup(web.page_source,"html.parser")
     seat_2 = {"硬卧":1,"硬座":2,"软卧":0,"二等座":2,"一等座":1,"特等座":0,"商务座":0,"无座":3}        #座位对应的余票信息
     ticket_num = html.findAll("span",{"id":"ticket_status_id"})[seat_2[seatType]].text.split(")")[1].split("张")[0]
     if(ticket_num == '有票'):
          return 1
     elif(int(ticket_num) >= person_num):
          return 1
     elif(person_num > 5):
          print("乘客人数超过5个")
          return 0
     else:
          print("余票不够,请重新选择!")
          return 0



def qiang_ticket(train_id):
     q_standard = 0                               #判断是否抢票成功
     count = 0
     web.get("https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc")
     web.find_element_by_id("query_ticket").click()
     web.implicitly_wait(30)
     html = BeautifulSoup(web.page_source,"html.parser")
     time.sleep(3)
     seat_Type = "硬卧" #input("请输入抢票坐席:")
     person =  "XXX" #input("请输入抢票乘客(仅限一位):")
     tag_id_list ={
                    "特等座":"SWZ",
                    "商务座":"SWZ",
                    "一等座":"ZY",
                    "二等座":"ZE",
                    "高级软卧":"GR",
                    "软卧":"RW",
                    "动卧":"DW",
                    "硬卧":"YW",
                    "软座":"RZ",
                    "硬座":"YZ",
                    "无座":"WZ",
                    "其他":"QT", 
                   }
     
     tag_id = tag_id_list[seat_Type] + "_" + train_id.split("_")[1]

     while(True):
          html = BeautifulSoup(web.page_source,"html.parser")
          goal = html.findAll("td",{"id":tag_id})[0].text
          time.sleep(2)
          if(goal != "无"):
               web.find_element_by_xpath("//tr[@id = '%s']/td[@align = 'center']/a" % train_id).click()
               html = BeautifulSoup(web.page_source,"html.parser")                              
               if what_ticket:                                                           #如果购买的是学生票的话
                    for i in html.findAll("label"):
                         if i.text.spilt("(")[0] in xs_person:
                              try:
                                   web.find_element_by_id(i["for"]).click()
                              except:
                                   web.find_element_by_id("dialog_xsertcj_ok").click()
                    web.find_element_by_id("submitOrder_id").click()
                    web.find_element_by_id("qr_submit_id").click()
                    print("抢票成功,请在30分钟前往12306内付款!!!!!!!")
                    q_standard = 1
                    send_message()
                    break
               else:
                    for i in html.findAll("label"):
                         if i.text.split("(")[0]  == person:
                              try:
                                   web.find_element_by_id(i["for"]).click() 
                              except:
                                   web.find_element_by_id("dialog_xsertcj_cancel").click()
                    web.find_element_by_id("submitOrder_id").click()
                    web.find_element_by_id("qr_submit_id").click()
                    print("抢票成功,请在30分钟前往12306内付款!!!!!!!")
                    q_standard = 1
                    send_message()
                    break
          web.refresh()
          web.find_element_by_id("query_ticket").click()
          count = count+1
          print("已抢票%s次" %count)
          time.sleep(1)
          
     return q_standard

def send_message():
     account_sid = "信息手动打码" 
     auth_token  = "信息手动打码"
     client = Client(account_sid, auth_token)
     message = client.messages.create(to="信息手动打码",from_="信息手动打码",body="抢票成功了")


def main():
     what_ticket = 0         #判断是学生票还是普通票,0是普通票,
     #username = input("请输入账号(按回车确认):")
     #password = input("请输入密码(按回车确认):")
     username = "12306账号"
     password = "12306密码"
     url = "https://kyfw.12306.cn/otn/resources/login.html"
     web = webdriver.Chrome()
     web.set_page_load_timeout(10)
     web.get(url)
     time.sleep(2)
     web.maximize_window()
     time.sleep(1)
     web.find_element_by_class_name("login-hd-account").click()
     time.sleep(2)
     web.find_element_by_id("J-userName").send_keys(username)
     time.sleep(3)
     web.find_element_by_id("J-password").send_keys(password)
     time.sleep(3)
     count = 0
     while(True):
          num = get_num()
          login(num)
          web.find_element_by_id("J-login").click()
          time.sleep(3)
          if(web.current_url != url):
               print("验证成功!!!")
               for i in num:
                    print("验证码的位置:",i)
               break
          else:
               print("验证失败,等待重新验证!!!!剩余(%s)次"%(2-count))
          count = count+1
          if count == 3:
               print("验证失败,结束程序!!")
               web.quit()
               sys.exit(0)

     print("登录成功!!")

     Ticket_information = get_Ticket_information()
     cookies = get_cookie(Ticket_information)
     tx_ticket(Ticket_information,cookies)
     web.find_element_by_id("query_ticket").click()
     time.sleep(5)
     train_number = print_Ticket()
     time.sleep(3)
     train_id = booking_ticket(train_number)
     buy_ticket(train_id)

main()

备注:其中对于开始的验证码,使用了一个第三方接口: http://littlebigluo.qicp.net:47720/

将验证照片上传,然后爬取需要点击照片对应的位置。

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
首先,为了访问QQ空间相册,我们需要登录QQ账号。可以使用Selenium模拟用户登录QQ,然后使用BeautifulSoup解析相册页面,获取相册中的图片链接。 以下是大致的实现步骤: 1. 安装SeleniumBeautifulSoup模块。 2. 使用Selenium模拟用户登录QQ,进入相册页面。 ```python from selenium import webdriver # 创建Chrome浏览器对象 browser = webdriver.Chrome() # 打开QQ登录页面 browser.get("https://mail.qq.com/") # 执行模拟登录操作,具体实现可以参考Selenium文档或其他相关教程 # ... # 进入相册页面 browser.get("http://user.qzone.qq.com/123456789/album") ``` 3. 使用BeautifulSoup解析相册页面,获取相册中的图片链接。 ```python from bs4 import BeautifulSoup # 获取相册页面的HTML源代码 html = browser.page_source # 使用BeautifulSoup解析HTML源代码,获取相册中的图片链接 soup = BeautifulSoup(html, "html.parser") img_links = soup.find_all("img", class_="c_photo_img_img") # 打印图片链接 for link in img_links: print(link["src"]) ``` 4. 下载图片。 ```python import requests # 下载图片 for i, link in enumerate(img_links): img_url = link["src"] response = requests.get(img_url) with open(f"photo_{i}.jpg", "wb") as f: f.write(response.content) ``` 以上只是大致的实现步骤,具体的实现过程可能还需要根据实际情况进行调整。同时,需要注意的是,访问QQ空间相册需要登录QQ账号,因此需要注意账号安全问题。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值