import re
from urllib
import request
class
Spider():
#<div class="video-info">主播姓名+观看人数</div>
#<span class="video-nickname" title="">主播姓名</span>
#<span class="video-number">1704</span>
url
=
'https://www.panda.tv/cate/lol'
root_pattern
=
'<div class="video-info">([\s\S]*?)</div>'
name_pattern
=
'</i>([\s\S]*?)</span>'
number_pattern
=
'<span class="video-number">([\s\S]*?)</span>'
#私有方法
def
_fetch_content(self):
r
= request.urlopen(Spider.url)
htmls
= r.read()
htmls
=
str(htmls,encoding
=
'utf-8')
return htmls
#实例方法
def
_analysis(self,htmls):
root_html
= re.findall(Spider.root_pattern,htmls)
anchors
= []
for html
in root_html:
name
= re.findall(Spider.name_pattern,html)
number
= re.findall(Spider.number_pattern,html)
anchor
= {
'name':name,
'number':number}
anchors.append(anchor)
return anchors
a
=
1
#私有方法数据精炼
def
_refine(self,anchors):
l
=
lambda anchor:{
'name':anchor[
'name'][
0].strip(),
'number':anchor[
'number'][
0]
}
return
map(l,anchors)
def
_sort_seed(self,anchor):
#提取数字
r
= re.findall(
'\d*',anchor[
'number'])
number
=
float(r[
0])
if
'万'
in anchor[
'number']:
number
*=
10000
return number
#按人气排序
def
_sort(self,anchors):
#filter
anchors
=
sorted(anchors,key
=self._sort_seed,reverse
=
True)
return anchors
#展现数据
def
_show(self,anchors):
for rank
in
range(
0,
len(anchors)):
print(
'rank '
+
str(rank
+
1)
+
' : '
+ anchors[rank][
'name']
+
' '
+ anchors[rank][
'number']
+
'人')
#入口,总控方法
def
go(self):
htmls
= self._fetch_content()
anchors
= self._analysis(htmls)
anchors
=
list(self._refine(anchors))
anchors
= self._sort(anchors)
self._show(anchors)
#实例化
spoder
= Spider()
spoder.go()