Python爬虫----爬取b站每日排行数据

本文通过Python爬虫展示了如何获取B站每日排行数据。在学习爬虫的过程中,作者强调了代码规范性和逻辑性的重要性,并表示希望通过分享带来一些启示。
摘要由CSDN通过智能技术生成

啥都先不说,上代码:

import requests, re, json, sqlite3, datetime, time


class BilibiliRank:
    def __init__(self, name):
        self.set = MyDB(name)
        self.name = name


    def save(self, data):
        table = 'Rank{}'.format(str(datetime.date.today()).replace('-', ''))
        columns = ['mid', 'author', 'title', 'bvid', 'pts']
        columns_type = ['Int', 'text', 'text', 'text', 'Int']
        primary_key = 'PRIMARY KEY ({})'.format(columns[3])
        self.set.cDB(table, columns, columns_type, primary_key)
        dic = {}
        for each in data:
            dic['{}'.format(columns[0])] = each['mid']
            dic[columns[1]] = "'{}'".format(each['author'])
            dic[columns[2]] = "'{}'".format(each['title'])
            dic[columns[3]] = "'{}'".format(each['bvid'])
            dic[columns[4]] = each['pts']
            self.set.wDB(table, dic.keys(), dic.values())
        print('written successfully')


    def run(self):
        worker = GetJson()
        data = worker.runing()
        self.save(data)


class GetJson:
    def __init__(self):
        self.url = 'https://api.bilibili.com/x/web-interface/ranking'
        self.deltext = '__jp14('
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
            'Referer': 'https://www.bilibili.com/'}
        self.params = {
            'rid': '0',
            'day': '1',
            'type': '1',
            'arc_type': '0',
            'jsonp': 'jsonp',
            'callback': '__jp14'}


    def runing(self):
        text1 = requests.get(self.url, headers=self.headers, params=self.params).text.replace(self.deltext, '')[:-1]
        data = json.loads(text1)['data']['list']
        return data


class MyDB:
    def __init__(self, name):
        self.name = name


    def cDB(self, table='example', columns=['ex_column'], colomns_type=['text'], primary_key=''):
        # create a database
        conn = sqlite3.connect(self.name)
        man = conn.cursor()
        add_time = "date timestamp not null default(datetime('now', 'localtime')),"
        columns_new = ""
        for each in columns:
            columns_new += "{} {},".format(each, colomns_type[columns.index(each)])
        command = "CREATE TABLE {}({} {} {})".format(table, columns_new, add_time, primary_key)
        try:
            man.execute(command)
            conn.commit()
            print('Create TABLE {} successfully'.format(table))
        except sqlite3.OperationalError as reason:
            print(reason)
        conn.close()


    def wDB(self, table='example', columns=["ex_column"], values=["'ex_data'"]):
        # write values into a database
        conn = sqlite3.connect(self.name)
        man = conn.cursor()
        columns_new = ""
        for each in columns:
            columns_new += "{},".format(each)
        values_new = ""
        for each in values:
            values_new += "{},".format(each)
        command = "INSERT OR REPLACE INTO '{}'({}) VALUES({})".format(table, columns_new[:-1], values_new[:-1])
        try:
            man.execute(command)
            conn.commit()
        except sqlite3.OperationalError as reason:
            print(reason)
        conn.close()


    def uDB(self):
        # update data in database throuugh primary key(maybe)
        pass


    def rDB(self, table='example'):
        # read data from database
        conn = sqlite3.connect(self.name)
        man = conn.cursor()
        command = "SELECT * FROM {}".format(table)
        try:
            for each in man.execute(command):
                print('{}'.format(each))
        except sqlite3.OperationalError as reason:
            print(reason)
        conn.close()


    def dDB(self, table='example'):
        # delete a database
        conn = sqlite3.connect(self.name)
        man = conn.cursor()
        command = "DROP TABLE '{}'".format(table)
        try:
            man.execute(command)
            conn.commit()
            print('Delete successfully')
        except sqlite3.OperationalError as reason:
            print(reason)
        conn.close()


if __name__ == '__main__':
    rank = BilibiliRank('bilibili_rank.db')
    rank.run()
    time.sleep(1)

运行结果:
db内容
看了挺多博客了,学习到了很多知识,在这里非常感谢各位大佬的知识分享。不过,翻了这么多的博客,感觉大多数人对于编写代码的规范性和逻辑性上还是有欠考究的。这篇文章希望能给坚持看完的同学一点收获吧。

评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值