以编程方式抓取中国联通电子发票

电子发票通常是以pdf文件存在,各大电商几乎都提供电子发票,如京东、淘宝(天猫)、苏宁易购、携程、中国联通、电信、移动等平台。那么,我们如何以编程方式爬取这些平台的电子发票呢?好了,这里我直接上代码供参考,实际上,经测试,各大电商平台的电子发票都是可以爬取的。欢迎加qq283335746共同探讨以编程方式爬虫抓取数据信息。

/// <summary>
    /// 中国联通(10010.com)
    /// </summary>
    public class ChinaUnicom
    {
        private ChinaUnicom() { }
        public ChinaUnicom(string userId, string cookieAppend)
        {
            this._userId = userId;
            this._userCookie = cookieAppend;
            this._relationUserId = Common.CookieFilter(_userCookie, string.Format(@"\s*{0}\s*=\s*(.|\n)*?;", "_uop_id"));
            this.OrderInvoices = new List<UnicomOrderInvoiceInfo>();
            this._invoiceBll = new ThirdPartyBll(userId, _relationUserId, ThirdPartyOptions.ChinaUnicom);
            this._thirdPartyRequest = new ThirdPartyRequest(cookieAppend);
            this._client = new NetClient(BaseUrl);
        }

        private const string BaseUrl = "http://wap.10010.com";
        private const string LoginPartialUrl = "uac.10010.com/oauth2/new_auth";
        private const string QueryInvoicesRefererUrl = "http://wap.10010.com/mobileService/query/einvoice.htm?navUrlCode=1307&menuId=000200060012";
        private const string QueryInvoicesUrl = "http://wap.10010.com/mobileService/query/einvoicelist.htm";

        private readonly string _userId = string.Empty;
        private readonly string _userCookie = string.Empty;
        private readonly string _relationUserId = string.Empty;
        private readonly NetClient _client;
        private readonly ThirdPartyBll _invoiceBll;
        private readonly ThirdPartyRequest _thirdPartyRequest;
        private List<UnicomOrderInvoiceInfo> OrderInvoices { get; set; }

        /// <summary>
        /// 发票处理入口
        /// </summary>
        /// <returns></returns>
        public async Task ExecuteInvoiceAsync()
        {
            var request = _thirdPartyRequest.CreateRequest(QueryInvoicesUrl, MethodOptions.Post);
            request.AddParameter(Dcr.ContentTypeKey, Dcr.ContentType, ParameterType.HttpHeader);

            //获取所有包含发票相关信息的订单集
            await GetOrderInvoicesAsync(request, 1, 40, "record");

            if (!OrderInvoices.Any())
            {
                if (Log.IsDebugEnabled) Log.Debug("ChinaUnicom.ExecuteInvoiceAsync,UserId:{0},RelationUserId:{1},not any invoice info! data from OrderInvoices.", _userId, _relationUserId);
                return;
            }

            var unicomInvoices = ToUnicomInvoices(OrderInvoices);
            if (unicomInvoices == null || !unicomInvoices.Any())
            {
                if (Log.IsDebugEnabled) Log.Debug("ChinaUnicom.ExecuteInvoiceAsync,not any invoice info! data from unicomInvoices.");
                return;
            }

            var oldInvoices = await _invoiceBll.GetUserThirdPartyTInvoices();
            foreach (var item in unicomInvoices)
            {
                var filePath = await DownloadInvoiceAsync(item);
                if (string.IsNullOrEmpty(filePath)) continue;

                var oldInfo = oldInvoices?.FirstOrDefault(m => m.InvoiceCode == item.invoicecode && m.InvoiceNumber == item.invoicenum);
                if (oldInfo != null) continue;

                //await invoiceBll.SaveThirdPartyInvoice(item.busiorder, item.invoiceurl, filePath);

                await _invoiceBll.SaveInvoice(filePath, string.Empty);
            }
        }

        /// <summary>
        /// 获取包含发票相关信息的订单集
        /// </summary>
        /// <param name="request"></param>
        /// <param name="pageIndex"></param>
        /// <param name="pageSize"></param>
        /// <param name="type"></param>
        /// <returns></returns>
        private async Task GetOrderInvoicesAsync(NetRequest request, int pageIndex, int pageSize, string type)
        {
            request.AddParameter("page", pageIndex.ToString());
            request.AddParameter("pageSize", pageSize.ToString());
            request.AddParameter("type", type);

            var response = await _client.ExecuteAsync(request);
            if (response.ResponseUri.ToString().Contains(LoginPartialUrl))
                throw new CustomException(MC.M_ThirdPartySiteLoginNeeding);

            if (string.IsNullOrEmpty(response.Content)) return;

            var orderInvoiceInfo = JsonConvert.DeserializeObject<UnicomOrderInvoiceInfo>(response.Content);
            if (orderInvoiceInfo?.totalmap == null || !orderInvoiceInfo.totalmap.Any())
            {
                if (Log.IsDebugEnabled) Log.Debug("ChinaUnicom.GetOrderInvoicesAsync,not has invoiceinfo! request Resource is : {0}", request.Resource);
                return;
            }

            OrderInvoices.Add(orderInvoiceInfo);
        }

        /// <summary>
        /// 下载并保存电子发票PDF文件,返回文件存储的物理路径
        /// </summary>
        /// <param name="invoiceInfo"></param>
        /// <returns></returns>
        private async Task<string> DownloadInvoiceAsync(UnicomInvoiceInfo invoiceInfo)
        {
            if (string.IsNullOrEmpty(invoiceInfo.invoicecode) || string.IsNullOrEmpty(invoiceInfo.invoicenum)) return string.Empty;

            var filePath = FileHelper.GetFilePath("ChinaUnicom", string.Format(@"{0}-{1}.pdf", invoiceInfo.invoicecode, invoiceInfo.invoicenum));
            if (File.Exists(filePath)) return filePath;

            var request = _thirdPartyRequest.CreateRequest(invoiceInfo.invoiceurl, QueryInvoicesUrl);
            var response = await _client.ExecuteAsync(request);
            if (response.ContentLength == 0 || response.ContentType != Dcr.PdfContentType) return string.Empty;

            await FileHelper.Save(response.RawBytes, filePath);

            return filePath;
        }

        /// <summary>
        /// 将订单发票信息集过滤出发票信息集,过滤无用数据
        /// </summary>
        /// <param name="unicomOrderInvoices"></param>
        /// <returns></returns>
        private static IEnumerable<UnicomInvoiceInfo> ToUnicomInvoices(IEnumerable<UnicomOrderInvoiceInfo> unicomOrderInvoices)
        {
            var unicomInvoices = from r in unicomOrderInvoices
                                 let t = r.totalmap
                                 from v in t
                                 let invs = v.value
                                 from inv in invs
                                 select inv;

            return unicomInvoices;
        }
    }
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值