python识别火车票二维码_python实现的一个火车票转让信息采集器

好吧,我承认我是对晚上看到一张合适的票转让但打过电话去说已经被搞走了这件事情感到蛋疼。直接上文件吧。

#coding: utf-8'''春运查询火车票转让信息Author: piglei2007@gmail.comDate: 2011.01.25'''import reimport osimport timeimport urlparseimport datetimeimport tracebackimport urllib2import socketsocket.setdefaulttimeout(20)BLANK_RE = re.compile(r"\s+")opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())opener.addheaders = [ ("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.1) Gecko/20090704 Firefox/3.5"), ("Accept", "*/*"),]urllib2.install_opener(opener)from BeautifulSoup import BeautifulSoupSOURCE = { "58": "http://bj.58.com/huochepiao/Num=%(train)sStartTime=%(date)s00", "ganji": "http://bj.ganji.com/piao/cc_%(train)s/%(date)s/",}RECORD_FILE = "/tmp/ticket_records.txt"def parse_record(): try: return set([x.strip() for x in open(RECORD_FILE, "r").readlines()]) except IOError: open(RECORD_FILE, "w") return set()def flush_record(records): open(RECORD_FILE, "w").write("\n".join(records))def main(config): """ 开始抓取 """ existed = parse_record() to_email = [] for train in config["trains"]: for date in config["dates"]: for type, _url in SOURCE.items(): url = _url % dict(train=train, date=date) content = urllib2.urlopen(url).read() soup = BeautifulSoup(content) result = parse_content(type, soup, train) for url, text in result: url = urlparse.urljoin(_url, url) # 只要卧铺! if url not in existed and u"卧" in text: to_email.append([text, url]) existed.add(url) if to_email: content = "".join( [x for x in [" | ".join(y) for y in to_email]] ).encode("utf-8") simple_mail(config["people"], content) flush_record(existed)def parse_content(type, soup, train): """ 获得车次信息 """ result = [] if type == "58": info_table = soup.find("table", id="infolist") if info_table: for x in info_table.findAll("tr", text=re.compile(ur"%s(!时刻表)" % train, re.I)): a = x.parent _text = BLANK_RE.sub("", a.text) result.append([a["href"], _text]) if type == "ganji": for x in soup.findAll("dl", {"class": "list_piao"}): a = x.dt.a result.append([a["href"], a.text]) return resultEMAIL_HOST = 'smtp.sohu.com'EMAIL_HOST_USER = 'yourname@sohu.com'EMAIL_HOST_PASSWORD = 'yourpassword'EMAIL_PORT = 25def simple_mail(to, content): """ 发送邮件 """ import smtplib from email.mime.text import MIMEText msgRoot = MIMEText(content, 'html', 'UTF-8') msgRoot['Subject'] = "[%s]有票来啦!!!!" % datetime.datetime.today().isoformat(" ") msgRoot['From'] = EMAIL_HOST_USER msgRoot['To'] = ", ".join(to) s = smtplib.SMTP(EMAIL_HOST, EMAIL_PORT) s.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD) s.sendmail(EMAIL_HOST_USER, to, msgRoot.as_string()) s.close()def switch_time_zone(): """ 切换时区 """ os.environ["TZ"] = "Asia/Shanghai" time.tzset()switch_time_zone()if __name__ == '__main__': config = { "trains": ("k471",), "dates": ("20110129",), "people": ( "youremail@sohu.com", ) } try: main(config) print "%s: ok" % datetime.datetime.today() except Exception, e: print traceback.format_exc()

然后放入cron,你懂的。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值