Python爬虫小案例

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/zcwforali/article/details/79979765
'''
    模块注释
'''
from urllib import request
import re

class Spider():
    '''
    类的注释,注释写在类下面
    '''
    url = 'https://www.panda.tv/cate/lol'
    rootPattern = '<div class="video-info">([\s\S]*?)</div>'
    namePattern = '</i>([\s\S]*?)</span>'
    numberPattern = '<span class="video-number">([\s\S]*?)</span>'

    def __fetchContent(self):
        '''
        获取html内容,写在方法下面
        '''
        r = request.urlopen(Spider.url)
        # bytes
        htmls = r.read()
        htmls = str(htmls, encoding='utf-8')
        return htmls

    def __analysis(self, htmls):
        '''
        分析出列表数据
        '''
        rootHtml = re.findall(Spider.rootPattern, htmls)
        anchors = []
        for html in rootHtml:
            name = re.findall(Spider.namePattern, html)
            number = re.findall(Spider.numberPattern, html)
            anchor = {'name': name, "number": number}
            anchors.append(anchor)
        return anchors

    def __refine(self, anchors):
        '''
        精简列表,格式化字符串
        '''
        l = lambda anchor : {'name': anchor['name'][0].strip(),
            'number': anchor['number'][0]}
        return map(l, anchors)

    def __sort(self, anchors):
        '''
        排序
        '''
        r = sorted(anchors, key=self.__sortSeed, reverse=True)
        return r

    def __sortSeed(self, anchor):
        '''
        设置元组比较的字段
        '''
        num = re.findall('(\d*)', anchor['number'])
        number = float(num[0])
        if '万' in anchor['number']:
            number *= 10000
        return number

    def __show(self, anchors):
        '''
        展示数据
        '''
        for index in range(0, len(anchors)):
            print(str(index + 1) + ':' + anchors[index]['name'] + '---------' + anchors[index]['number'])

    def do(self):
        '''总控'''
        htmls = self.__fetchContent()
        anchors = self.__analysis(htmls)
        anchors = list(self.__refine(anchors))
        anchors = self.__sort(anchors)
        self.__show(anchors)

spider = Spider()
spider.do()
阅读更多 登录后自动展开
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页