Scala是一种“面向对象的函数式”语言。对于不熟悉函数式编程风格的人来说适应Scala确实需要一段时间。也只有多看多写了吧。以下是用Scala结合HttpClient实现的一个简单的爬虫小程序。
package com.eric.crawler
import java.io.{BufferedReader, InputStream, InputStreamReader}
import com.eric.Response
import org.apache.http.HttpEntity
import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet}
import org.apache.http.impl.client.{CloseableHttpClient, HttpClients}
import org.apache.http.util.EntityUtils
import scala.io.Source
object Crawler {
val httpClient : CloseableHttpClient = HttpClients.createDefault()
/**
* doGet请求获取一个网页
* @param url
* @return
*/
def doGet(url : String) : Response = {
val httpGet : HttpGet = new HttpGet(url) //初始化httpGet
val httpResponse : CloseableHttpResponse = httpClient.execute(httpGet)
val httpEntity : HttpEntity = httpResponse.getEntity
val inputStream : InputStream = httpEntity.getContent
val pageContent : String = Source.fromInputStream(inputStream).mkString //inputStream转化为String
val status : Int = httpResponse.getStatusLine.getStatusCode
EntityUtils.consume(httpEntity) //关闭httpResponse中的inputStream
httpResponse.close()
new Response(status, url, pageContent)
}
def main(args : Array[String]) : Unit = {
val seed : String = "这里是网址..."
val resp : Response = Crawler.doGet(seed)
println(resp.content)
}
}