一个超级简单的scala豆瓣爬虫
import com. wzs. spider. Helper
import scala. collection. mutable. ListBuffer
object spider {
var helper= new Helper( )
val list: ListBuffer[ Film] = new ListBuffer[ Film]
def main( args: Array[ String ] ) : Unit = {
val url= "https://movie.douban.com/top250"
parse( url) . foreach( println( _) )
}
def getUrlContent( url: String ) : String = helper. getDocument( url) . toString
def parse( url: String ) : ListBuffer[ Film] = {
val doc= helper. getDocument( url)
val e= doc. select( "ol[class='grid_view']" ) . select( "li" )
for ( i <- 0 until e. size( ) ) {
val cur= e. get( i)
val title= cur. select( "div.hd>a" ) . text( )
val score= cur. select( "span.rating_num" ) . text( )
val score_num= cur. select( "div.star>span" )
val num= score_num. get( score_num. size( ) - 1 ) . text( )
val desc= cur. select( "div.bd>p.quote>span" ) . text( )
list+= new Film( title, score, num, desc)
}
return list
}
}
class Film( name: String , score: String , scoreNums: String , desc: String ) {
override def toString: String = {
"-" * 88 + "\n" + name+ "\n" + score+ "\t" + scoreNums+ "\n" + desc+ "\n" + "-" * 88 + "\n"
}
}
效果图