电子发票通常是以pdf文件存在,各大电商几乎都提供电子发票,如京东、淘宝(天猫)、苏宁易购、携程、中国联通、电信、移动等平台。那么,我们如何以编程方式爬取这些平台的电子发票呢?好了,这里我直接上代码供参考,实际上,经测试,各大电商平台的电子发票都是可以爬取的。欢迎加qq283335746共同探讨以编程方式爬虫抓取数据信息。
/// <summary>
/// 中国联通(10010.com)
/// </summary>
public class ChinaUnicom
{
private ChinaUnicom() { }
public ChinaUnicom(string userId, string cookieAppend)
{
this._userId = userId;
this._userCookie = cookieAppend;
this._relationUserId = Common.CookieFilter(_userCookie, string.Format(@"\s*{0}\s*=\s*(.|\n)*?;", "_uop_id"));
this.OrderInvoices = new List<UnicomOrderInvoiceInfo>();
this._invoiceBll = new ThirdPartyBll(userId, _relationUserId, ThirdPartyOptions.ChinaUnicom);
this._thirdPartyRequest = new ThirdPartyRequest(cookieAppend);
this._client = new NetClient(BaseUrl);
}
private const string BaseUrl = "http://wap.10010.com";
private const string LoginPartialUrl = "uac.10010.com/oauth2/new_auth";
private const string QueryInvoicesRefererUrl = "http://wap.10010.com/mobileService/query/einvoice.htm?navUrlCode=1307&menuId=000200060012";
private const string