使用 HTTP 访问谷歌的电子数据

728 篇文章 1 订阅
86 篇文章 0 订阅

使用 HTTP 访问谷歌的电子数据

 

由于通过 HTTP 和 XML 公开数据的简单和平台无关性,因此,成为了互联网上公开数据最流行的方法之一,只需要使用 HTTP 和部分 XML 处理,就可以访问数量大得惊人的数据。访问谷歌自己公布的电子数据就是一个很好的应用,在清单 11-4 中我们可以看到如何访问谷歌的电子数据。

 

注意

我们将要访问的电子数据来自卫报数据商店(Guardian Data Store),它通过谷歌电子出版了英国以及世界上的静态数据,在这里可以发现许多有用的资源:

http://www.guardian.co.uk/data-store

 

清单 11-4 使用 HTTP 访问谷歌的电子数据

 

open System

open System.IO

open System.Net

open System.Xml

open System.Xml.XPath

 

//some namespace information for the XML

let namespaces =

  [ "at", "http://www.w3.org/2005/Atom";

    "openSearch","http://a9.com/-/spec/opensearchrss/1.0/";

    "gsx","http://schemas.google.com/spreadsheets/2006/extended" ]

 

//read the XML and process it into amatrix of strings

letqueryGoogleSpreadSheet (xdoc:XmlDocument) xpath columnNames =

  let nav = xdoc.CreateNavigator()

  let mngr = new XmlNamespaceManager(new NameTable())

  do List.iter (fun (prefix, url) -> mngr.AddNamespace(prefix, url)) namespaces

  let xpath = nav.Compile(xpath)

  do xpath.SetContext(mngr)

  let iter = nav.Select(xpath)

  seq { for x in iter ->

        let x = x :?> XPathNavigator

        let getValue nodename =

          let node =x.SelectSingleNode(nodename, mngr)

          node.Value

        Seq.map getValuecolumnNames }

 

//read the spreadsheet from its webaddress

letgetGoogleSpreadSheet (url: string)columnNames =

  let req = WebRequest.Create(url)

  use resp = req.GetResponse()

  use stream = resp.GetResponseStream()

  let xdoc = new XmlDocument()

  xdoc.Load(stream)

  queryGoogleSpreadSheet xdoc "/at:feed/at:entry" columnNames

 

// alocation to hold the information we'reinterested in

type Location =

  { Country: string;

    NameValuesList:seq<string * option<float>> }

 

//creates a location from the row names

let createLocationnames row =

  let country = Seq.head row

  let row = Seq.skip 1 row

  let tryParse s =

    let success,res = Double.TryParse s

    if success then Some res else None

  let values = Seq.map tryParse row

  { Country = country;

    NameValuesList= Seq.zip names values }

 

//get the data and process it into records

let getDataAndProcessurl colNames =

  //get the names of the columns we want

  let cols = Seq.map fst colNames

  //get the data

  let data = getGoogleSpreadSheet url cols

  //get the readable names of the columns

  let names = Seq.skip 1 (Seq.map snd colNames)

  //create strongly typed records from the data

  Seq.map(createLocation names) data

 

//function to create a spreadsheets URLfrom it's key

let makeUrl = Printf.sprintf"http://spreadsheets.google.com/feeds/list/%s/od6/public/values"

 

let main() =

  //the key of the spreadsheet we're interested in

  let sheatKey = "phNtm3LmDZEP61UU2eSN1YA"

  //list of column names we're interested in

  let cols =

    [ "gsx:location", "";

      "gsx:hospitalbedsper10000population",

       "Hospitalbedsper 1000";

      "gsx:nursingandmidwiferypersonneldensityper10000population",

       "Nursing andMidwifery Personnel per1000" ];

  //get the data

  let data = getDataAndProcess (makeUrl sheatKey) cols

  //print the data

  Seq.iter(printfn "%A") data

 

do main()


[

需要引用 System.Xml.dll


运行前面的程序,得到如下的结果:

 

...

{Country = "Sweden";

 NameValuesList=

  seq

    [("Hospitalbeds per 1000", null);

     ("Nursingand Midwifery Personnel per 1000", Some 109.0)];}

{Country = "Switzerland";

 NameValuesList=

  seq

   [("Hospitalbeds per 1000", Some 57.0);

    ("Nursingand Midwifery Personnel per 1000", Some 110.0)];}

...

 

关注这个示例的重要一点是我们用来检索数据的方法几乎没有改变,在代码的核心部分,我们发面有几行代码与生成 HTTP 请求,检索 XML 文档的代码相同:

 

let req = WebRequest.Create(url)

use resp = req.GetResponse()

use stream = resp.GetResponseStream()

let xdoc = new XmlDocument()

xdoc.Load(stream)

 

其余的大部分代码都是处理返回的 XML 数据。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值