以编程方式抓取携程电子发票

电子发票通常是以pdf文件存在,各大电商几乎都提供电子发票,如京东、淘宝(天猫)、苏宁易购、携程、中国联通、电信、移动等平台。那么,我们如何以编程方式爬取这些平台的电子发票呢?好了,这里我直接上代码供参考,实际上,经测试,各大电商平台的电子发票都是可以爬取的。欢迎加qq283335746共同探讨以编程方式爬虫抓取数据信息。

/// <summary>
    /// 携程(ctrip.com)
    /// </summary>
    public class Ctripcom
    {
        private Ctripcom() { }

        public Ctripcom(string userId, string cookieAppend)
        {
            _userId = userId;
            this._relationUserId = Common.CookieFilter(cookieAppend, string.Format(@"\s*{0}\s*=\s*(.|\n)*?;", "login_uid"));
            this._thirdPartyRequest = new ThirdPartyRequest(cookieAppend);
            this._invoiceBll = new ThirdPartyBll(userId, _relationUserId, ThirdPartyOptions.Ctrip);
            
            this.OrderInvoices = new List<CtripOrderInvoiceInfo>();
            this._client = new NetClient(BaseUrl);
        }

        private const string BaseUrl = "http://my.ctrip.com";
        private const string OrderInvoicesUrl = "http://my.ctrip.com/einvoice/EInvoice.aspx";
        private const string InvoiceDatasUrl = "http://my.ctrip.com/einvoice/Handle/InvoiceData.aspx";
        private const string DownloadInvoiceUrl = "http://my.ctrip.com/einvoice/DownPDF.aspx?Invinfoid={0}&ts={1}";

        private readonly string _userId = string.Empty;
        private readonly string _relationUserId = string.Empty;
        private readonly NetClient _client;
        private readonly ThirdPartyRequest _thirdPartyRequest;
        private readonly ThirdPartyBll _invoiceBll;
        private List<CtripOrderInvoiceInfo> OrderInvoices { get; set; }

        /// <summary>
        /// 发票处理入口
        /// </summary>
        /// <returns></returns>
        public async Task ExecuteInvoiceAsync()
        {
            if (Log.IsDebugEnabled) Log.Debug("Ctripcom.ExecuteInvoiceAsync 1 is starting,UserId:{0},RelationUserId:{1}", _userId, _relationUserId);

            var request = _thirdPartyRequest.CreateRequest(InvoiceDatasUrl,OrderInvoicesUrl,MethodOptions.Post);

            await GetOrderInvoicesAsync(request, 1, 10);
            if (!OrderInvoices.Any())
            {
                if (Log.IsDebugEnabled) Log.Debug("Ctripcom.ExecuteInvoiceAsync,not any invoice info! cause by: !OrderInvoices.Any() ");
                return;
            }

            var invoices = InvoicesFilter(OrderInvoices);
            if (invoices == null || !invoices.Any())
            {
                if (Log.IsDebugEnabled) Log.Debug("Ctripcom.ExecuteInvoiceAsync,not any invoice info! cause by: !invoices.Any()");
                return;
            }

            var oldInvoices = await _invoiceBll.GetUserThirdPartyTInvoices();

            foreach (var item in invoices)
            {
                var filePath = await DownloadInvoiceAsync(item);
                if (string.IsNullOrEmpty(filePath)) continue;

                var oldInvoiceInfo = oldInvoices?.FirstOrDefault(m => m.InvoiceCode == item.InvCode && m.InvoiceNumber == item.InvNumber);
                if (oldInvoiceInfo != null)
                {
                    continue;
                }

                //await invoiceBll.SaveThirdPartyInvoice(item.BuyerBillID, request.Resource, filePath);

                await _invoiceBll.SaveInvoice(filePath, item.BuyerBillID);
            }
        }

        /// <summary>
        /// 获取包含发票相关信息的订单集
        /// </summary>
        /// <param name="request"></param>
        /// <param name="pageIndex"></param>
        /// <param name="pageSize"></param>
        /// <returns></returns>
        private async Task GetOrderInvoicesAsync(NetRequest request, int pageIndex, int pageSize)
        {
            if (Log.IsDebugEnabled) Log.Debug("Ctripcom.GetOrderInvoicesAsync 2 is starting");

            while (true)
            {
                request.AddParameter("page", pageIndex.ToString());
                request.AddParameter("pageSize", pageSize.ToString());

                var response = await _client.ExecuteAsync(request);

                if (Log.IsDebugEnabled) Log.Debug("Ctripcom.GetOrderInvoicesAsync 21,Content:{0}", response.Content);

                if (string.IsNullOrEmpty(response.Content)) return;

                var orderInvoiceInfo = JsonConvert.DeserializeObject<CtripOrderInvoiceInfo>(response.Content);
                if (orderInvoiceInfo == null)
                {
                    if (Log.IsDebugEnabled) Log.Debug("Ctripcom.GetOrderInvoicesAsync,orderInvoiceInfo is null! request Resource is : {0}", request.Resource);
                    return;
                }

                if (Log.IsDebugEnabled) Log.Debug("Ctripcom.GetOrderInvoicesAsync 22");

                OrderInvoices.Add(orderInvoiceInfo);

                //假如还有发票则继续获取
                if (orderInvoiceInfo.totalCount > pageIndex * pageSize)
                {
                    pageIndex++;
                    continue;
                }
                break;
            }
        }

        /// <summary>
        /// 下载并保存电子发票PDF文件,返回文件存储的物理路径
        /// </summary>
        /// <param name="invoiceInfo"></param>
        /// <returns></returns>
        private async Task<string> DownloadInvoiceAsync(CtripInvoiceInfo invoiceInfo)
        {
            if (Log.IsDebugEnabled) Log.Debug("Ctripcom.DownloadInvoiceAsync 3");

            var filePath = FileHelper.GetFilePath("Ctripcom", string.Format(@"{0}-{1}.pdf", invoiceInfo.InvCode, invoiceInfo.InvNumber));
            if (File.Exists(filePath)) return filePath;

            var sts = SignHelper.GetTimestamp(DateTime.Now);

            var request = _thirdPartyRequest.CreateRequest(string.Format(DownloadInvoiceUrl, invoiceInfo.Invinfoid, sts), OrderInvoicesUrl);
            request.AddParameter("ts", sts.ToString(), ParameterType.Cookie);

            var response = await _client.ExecuteAsync(request);

            if (Log.IsDebugEnabled) Log.Debug("Ctripcom.GetOrderInvoicesAsync 31,Content:{0},request.Resource:{1}", response.Content,request.Resource);

            if (response.ContentLength == 0 || response.Content == "exception") return string.Empty;

            await FileHelper.Save(response.RawBytes, filePath);

            return filePath;
        }

        /// <summary>
        /// 以订单发票信息集得到发票信息集
        /// </summary>
        /// <param name="orderInvoices"></param>
        /// <returns></returns>
        private static IEnumerable<CtripInvoiceInfo> InvoicesFilter(IEnumerable<CtripOrderInvoiceInfo> orderInvoices)
        {
            var q = from r in orderInvoices
                let invs = r.EVatinvoiceinfos
                from inv in invs
                where !string.IsNullOrEmpty(inv.InvCode) && !string.IsNullOrEmpty(inv.InvNumber)
                select inv;

            return q;
        }
    }
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值