借鉴C#网页爬虫抓取行政区划,从国家统计局获取了最新行政区域数据。
以下为代码贴片:
数据库类:
public class City {
public decimal ID { get; set; }
public string Name { get; set; }
public string Code { get; set; }
public string Org_Level { get; set; }
public string ParentCode { get; set; }
public decimal ParentID { get; set; }
public string Contry { get; set; }
public string Loc_x { get; set; }
public string Loc_y { get; set; }
}
获取网页帮助类:
public class HttpHelper {
private static ILog log = log4net.LogManager.GetLogger(typeof(HttpHelper));
public static string DownloadHtml(string url,Encoding encod) {
string html = string.Empty;
try {
//设置请求参数
HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;
request.Timeout = 10 * 1000;//10s超时
request.ContentType = "text/html;charset=utf-8";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36";
//获取结果
using(HttpWebResponse resp = request.GetResponse() as HttpWebResponse) {
if(resp.StatusCode != HttpStatusCode.OK) {
log.Fatal(string.Format("抓取{0}地址返回失败,response.StatusCode = {1}",url,resp.StatusCode));
} else {
try {
StreamReader sr = new StreamReader(resp.GetResponseStream(),encod);
html = sr.ReadToEnd();
sr.Close();
} catch(Exception e) {
log.Fatal(string.Format("DownLoadHtml抓取htm