chromedriver追加html,使用Phantomjs和ChromeDriver添加Cookies的方法

namespaceToutiaoSpider

{classProgram

{static void Main(string[] args)

{var db =Db.GetDataHelper();

db.CreateOrUpdateTable(typeof(Models.ArticleMonitorModule));while (true)

{var src =LoadSource();try{//var item = GetItem(src);//if (item != null)//{//try//{//db.Insert(item.ToArray());//Console.WriteLine("Insert a row in :" + DateTime.UtcNow.AddHours(8).ToString("yyyy-MM-dd HH:mm:ss"));//}//catch (Exception ex)//{ Console.WriteLine("Insert to database error:" + ex.Message); }//}

if (!Directory.Exists("C:\\TempFiles\\")) Directory.CreateDirectory("C:\\TempFiles\\");

File.WriteAllText("C:\\TempFiles\\" + DateTime.Now.ToString("yyyy_MM_dd_HH_mm_ss") + ".html", src);

}catch (Exception ex) { Console.WriteLine("Get item error:" +ex.Message); }

Thread.Sleep(5 * 60 * 1000);

}

}static List GetItem(stringsrc)

{

List lst = new List();

HtmlAgilityPack.HtmlDocument docs= newHtmlAgilityPack.HtmlDocument();

docs.LoadHtml(src);var jsonText =docs.DocumentNode.InnerText;var json =(JObject)JsonConvert.DeserializeObject(jsonText);if (json["message"].Value() == "success")

{var token = json.GetValue("data");if (token == null) returnlst;foreach (var data intoken)

{var title = data["title"]?.Value();//if (title != "中國與巴拿馬建立外交關系") continue;

var go_detail_count = data["go_detail_count"]?.Value();var url = data["display_url"]?.Value();var itemId = data["item_id"]?.Value();var comments_count = data["comments_count"]?.Value();var site = "toutiao.com";

ArticleMonitorModule item= newArticleMonitorModule()

{

__id=Guid.NewGuid(),

article_id=itemId,

site=site,

url=url,

comment_count=comments_count,

visit_count=go_detail_count,

fetch_time= DateTime.UtcNow.AddHours(8),

like_count= null};

lst.Add(item);

}

}returnlst;

}staticIWebDriver CreateDriver()

{var service =PhantomJSDriverService.CreateDefaultService();var driver = new OpenQA.Selenium.PhantomJS.PhantomJSDriver(service, new PhantomJSOptions(), TimeSpan.FromSeconds(120));//var service = ChromeDriverService.CreateDefaultService();//var driver = new ChromeDriver(service, new ChromeOptions(), TimeSpan.FromSeconds(120));//var service = OpenQA.Selenium.Firefox.FirefoxDriverService.CreateDefaultService();//var driver = new OpenQA.Selenium.Firefox.FirefoxDriver(service, new OpenQA.Selenium.Firefox.FirefoxOptions(), TimeSpan.FromSeconds(120));

returndriver;

}static stringLoadSource()

{var driver =CreateDriver();var nav =driver.Navigate();var cookies =driver.Manage().Cookies;//nav.GoToUrl("http://www.toutiao.com/c/user/favourite/?page_type=2&user_id=61045799395&max_behot_time=0&count=20&as=A15569B3CF98ED7&cp=593F781EDDB7FE1&max_repin_time=0");

int retryCount = 6;while (true)

{try{//nav.GoToUrl("http://www.toutiao.com/c/user/favourite/?page_type=2&user_id=61045799395&max_behot_time=0&count=20&as=A14529438F1A7A4&cp=593F3A47DAD44E1&max_repin_time=0");

var url = "http://is.snssdk.com/2/article/information/v21/?version_code=6.1.6&app_name=news_article&vid=C5585644-2731-495E-8CF2-B42BBA4D7780&device_id=35980279488&channel=App%20Store&resolution=1125*2001&aid=13&ab_version=120431,134942,136400,126064,122834,130106,126068,128826,134127,136030,137117,136268,137571,126070,136111,116022,135623,125502,137069,125174,135489,133019,137083,126059,137452,135631,136930,122948,137474,137431,31210,133013,135290,131207,114338,133770&ab_feature=z1&openudid=f870822c71509e95ee8f58db8b1d70ce9cb14713&live_sdk_version=1.6.5&idfv=C5585644-2731-495E-8CF2-B42BBA4D7780&ac=WIFI&os_version=10.3.2&ssmix=a&device_platform=iphone&iid=11267657395&ab_client=a1,f2,f7,e1&device_type=iPhone%206S%20Plus&idfa=B1742B5B-DF14-44EF-A325-362873389ABA&aggr_type=1&article_page=0&device_id=35980279488&from_category=news_entertainment&group_id=6411002681368035586";

nav.GoToUrl(url);var uri = newUri(url);

cookies.AddCookie(new Cookie("csrftoken", "b9e36219cad78dfe6a1c687d6b368b52", uri.DnsSafeHost, "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("csrftoken", "b9e36219cad78dfe6a1c687d6b368b52", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("CNZZDATA1259612802", "2103889297-1495413998-null%7C1495413998", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("UM_distinctid", "15c2dec3e02f09-0fc740fef2ffb7-572f7b6e-1fa400-15c2dec3e03cf0", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("tt_webid", "6431015020234769922", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("uuid", "\"w:a5e3254676244e0ab15fc4291e372d14\"", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("_ga", "GA1.2.1639521857.1495419078", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("_gid", "GA1.2.396214455.1495419159", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("sso_login_status", "1", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("login_flag", "f8947cb01c5a760d0cbc4925e601ca60", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("sessionid", "5c865f7a96598cff3b3d580fcd3dfd27", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("sid_tt", "5c865f7a96598cff3b3d580fcd3dfd27", "toutiao.com", "/", DateTime.Now.AddYears(1)));//cookies.AddCookie(new Cookie("sid_guard", "\"5c865f7a96598cff3b3d580fcd3dfd27|1495419148|2591999|Wed\054 21-Jun-2017 02:12:27 GMT\"", "toutiao.com", "/", DateTime.Now.AddYears(1)));

nav.Refresh();

Console.WriteLine("OK!" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"));break;

}catch(Exception ex)

{

nav.Refresh();//說明,對於PhantomJS的話,雖然前面AddCookies時總是拋出異常,但是實際Cookies是添加成功的。所以需要在此Refresh。

Console.WriteLine("retry" + retryCount + ":" + ex.Message); Thread.Sleep(1000); if (retryCount-- <= 0) break;

}

}var pageSource =driver.PageSource;//var st = driver.TakeScreenshot();//st.SaveAsFile("d:\\" + DateTime.Now.ToString("yyyyMMddHHmmssfff") + ".jpg", ScreenshotImageFormat.Jpeg);

driver.Close();

driver.Dispose();returnpageSource;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值