电子发票通常是以pdf文件存在,各大电商几乎都提供电子发票,如京东、淘宝(天猫)、苏宁易购、携程、中国联通、电信、移动等平台。那么,我们如何以编程方式爬取这些平台的电子发票呢?好了,这里我直接上代码供参考,实际上,经测试,各大电商平台的电子发票都是可以爬取的。欢迎加qq283335746共同探讨以编程方式爬虫抓取数据信息。
/// <summary>
/// 京东(jd.com)
/// </summary>
public class Jdcom
{
private Jdcom() { }
public Jdcom(string userId, string cookieAppend)
{
this._client = new NetClient(BaseUrl);
this._invoiceBll = new ThirdPartyBll(userId, Common.CookieFilter(cookieAppend, string.Format(@"\s*{0}\s*=\s*(.|\n)*?;", "sid")), ThirdPartyOptions.Jd);
this._thirdPartyRequest = new ThirdPartyRequest(cookieAppend);
this.OrderInvoices = new List<DownloadInvoiceInfo>();
}
private readonly NetClient _client;
private readonly ThirdPartyBll _invoiceBll;
private readonly ThirdPartyRequest _thirdPartyRequest;
private List<DownloadInvoiceInfo> OrderInvoices;
private const string BaseUrl = "https://home.jd.com";
private const string BaseInvoiceUrl = "https://myivc.jd.com";
private const string OrderInvoicesUrl = "https://myivc.jd.com/fpzz/index.action";
/// <summary>
/// 发票处理入口
/// </summary>
/// <returns></returns>
public async Task ExecuteInvoiceAsync()
{
if(Log.IsDebugEnabled) Log.Debug("Jdcom.ExecuteInvoiceAsync is starting--------------------------");
await FromHomeToInvoice();
var request = _thirdPartyRequest.CreateRequest(OrderInvoicesUrl);
//下载并解析出订单发票信息
await GetOrderInvoicesAsync(request, 1);
//保存得到的发票信息集
await _invoiceBll.SaveDownloadInvoice(OrderInvoices);
}
/// <summary>
/// 获取包含发票相关信息的订单集
/// </summary>
/// <param name="request"></param>
/// <param name