python爬虫,使用urllib + 正则

学习使用urllib + 正则爬取熊猫TV的直播,获取直播人和直播人气,并且进行了排序

from urllib import  request
import re
class Spider():
    url = 'https://www.panda.tv/cate/dota2'
    root_pattern = r'<div class="video-info">([\s\S]*?)</div>'
    title_pattern = r'</i>([\s\S]*?)</span>'
    number_pattern = r'<span class="video-number">([\s\S]*?)</span>'
    def __fetch_content(self):
        r = request.urlopen(self.url)
        htmls = r.read()
        htmls = str(htmls, encoding = 'utf-8')
        return htmls

    def __analysis(self, htmls):
        total = []
        root_html = re.findall(self.root_pattern, htmls)
        for i in range(len(root_html)):
            title = re.findall(self.title_pattern, root_html[i])
            number = re.findall(self.number_pattern, root_html[i])
            live_room =  {'title':title, 'number':number}
            total.append(live_room)
        return total

    def __refine(self, total):
        l = lambda each_item: {'title':each_item['title'][0].strip(), 'number':each_item['number'][0].strip()}
        return map(l, total)

    def __sort(self, refine_total):
        refine_total = sorted(refine_total, key = self.__sort_seed, reverse = True)
        return refine_total

    def __sort_seed(self,each_tiem):
        r = re.findall('\d*',each_tiem['number'])
        number = float(r[0])
        if '万' in each_tiem['number']:
            number *= 10000
        return number

    def __show(self, refine_total):
        for i in refine_total:
            print(i['title'] + '---' + i['number'])

    def start(self):
        htmls = self.__fetch_content()
        total = self.__analysis(htmls)
        refine_total = list(self.__refine(total))
        refine_total = self.__sort(refine_total)
        self.__show(refine_total)

spider = Spider()
spider.start()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值