简单练手:B站前100爬取

import requests
import xlwt
import re

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}

def getHtml(url):
    try:
        r = requests.get(url,headers = headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return None

def get_info(html,mylist):
    ranks = re.findall('<div class="num">(\d*)</div><div class="content">',html,re.S)
    names = re.findall('target="_blank" class="title">(.*?)</a><!----><div class="detail">',html,re.S)
    auctors = re.findall('<i class="b-icon author"></i>(.*?)</span></a>',html,re.S)
    scores = re.findall('div class="pts"><div>(\d+)</div>综合得分',html,re.S)
    for rank,name,auctor,score in zip(ranks,names,auctors,scores):
        temp = [rank,name,auctor,score]
        mylist.append(temp)

def writeExel(mylist):
    header = ['排名','标题','作者','得分']
    book = xlwt.Workbook(encoding='UTF-8')
    sheet = book.add_sheet('Sheet1')
    for k in range(len(header)):
        sheet.write(0,k,header[k])
    i = 1
    for part in mylist:
        j = 0
        for what in part:
            sheet.write(i,j,what)
            j += 1
        i += 1
    book.save('D:/bilibiliTop100.xls')

if __name__ == '__main__':
    mylist = []
    url = 'https://www.bilibili.com/ranking?'
    html = getHtml(url)
    get_info(html,mylist)
    writeExel(mylist)
    
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是辽宁省行政区划的 SQL 语句: ``` -- 创建省份表 CREATE TABLE province ( id INT PRIMARY KEY, name VARCHAR(20) ); -- 插入省份数据 INSERT INTO province (id, name) VALUES (210000, '辽宁省'); -- 创建市级表 CREATE TABLE city ( id INT PRIMARY KEY, name VARCHAR(20), province_id INT, FOREIGN KEY (province_id) REFERENCES province(id) ); -- 插入市级数据 INSERT INTO city (id, name, province_id) VALUES (210100, '沈阳市', 210000), (210200, '大连市', 210000), (210300, '鞍山市', 210000), (210400, '抚顺市', 210000), (210500, '本溪市', 210000), (210600, '丹东市', 210000), (210700, '锦州市', 210000), (210800, '营口市', 210000), (210900, '阜新市', 210000), (211000, '辽阳市', 210000), (211100, '盘锦市', 210000), (211200, '铁岭市', 210000), (211300, '朝阳市', 210000), (211400, '葫芦岛市', 210000); -- 创建县级表 CREATE TABLE county ( id INT PRIMARY KEY, name VARCHAR(20), city_id INT, FOREIGN KEY (city_id) REFERENCES city(id) ); -- 插入县级数据 INSERT INTO county (id, name, city_id) VALUES (210101, '市辖区', 210100), (210102, '和平区', 210100), (210103, '沈河区', 210100), (210104, '大东区', 210100), (210105, '皇姑区', 210100), (210106, '铁西区', 210100), (210111, '苏家屯区', 210100), (210112, '浑南区', 210100), (210113, '沈北新区', 210100), (210114, '于洪区', 210100), (210115, '辽中区', 210100), (210123, '康平县', 210100), (210124, '法库县', 210100), (210181, '新民市', 210100), (210201, '市辖区', 210200), (210202, '中山区', 210200), (210203, '西岗区', 210200), (210204, '沙河口区', 210200), (210211, '甘井子区', 210200), (210212, '旅顺口区', 210200), (210213, '金州区', 210200), (210214, '普兰店区', 210200), (210224, '长海县', 210200), (210281, '瓦房店市', 210200), (210283, '庄河市', 210200), (210301, '市辖区', 210300), (210302, '铁东区', 210300), (210303, '铁西区', 210300), (210304, '立山区', 210300), (210311, '千山区', 210300), (210321, '台安县', 210300), (210323, '岫岩满族自治县', 210300), (210381, '海城市', 210300), (210401, '市辖区', 210400), (210402, '新抚区', 210400), (210403, '东洲区', 210400), (210404, '望花区', 210400), (210411, '顺城区', 210400), (210421, '抚顺县', 210400), (210422, '新宾满族自治县', 210400), (210423, '清原满族自治县', 210400), (210501, '市辖区', 210500), (210502, '平山区', 210500), (210503, '溪湖区', 210500), (210504, '明山区', 210500), (210505, '南芬区', 210500), (210521, '本溪满族自治县', 210500), (210522, '桓仁满族自治县', 210500), (210601, '市辖区', 210600), (210602, '元宝区', 210600), (210603, '振兴区', 210600), (210604, '振安区', 210600), (210624, '宽甸满族自治县', 210600), (210681, '东港市', 210600), (210682, '凤城市', 210600), (210701, '市辖区', 210700), (210702, '古塔区', 210700), (210703, '凌河区', 210700), (210711, '太和区', 210700), (210726, '黑山县', 210700), (210727, '义县', 210700), (210781, '凌海市', 210700), (210782, '北镇市', 210700), (210801, '市辖区', 210800), (210802, '站区', 210800), (210803, '西市区', 210800), (210804, '鲅鱼圈区', 210800), (210811, '老边区', 210800), (210881, '盖州市', 210800), (210882, '大石桥市', 210800), (210901, '市辖区', 210900), (210902, '海州区', 210900), (210903, '新邱区', 210900), (210904, '太平区', 210900), (210905, '清河门区', 210900), (210911, '细河区', 210900), (210921, '阜新蒙古族自治县', 210900), (210922, '彰武县', 210900), (211001, '市辖区', 211000), (211002, '白塔区', 211000), (211003, '文圣区', 211000), (211004, '宏伟区', 211000), (211005, '弓长岭区', 211000), (211011, '太子河区', 211000), (211021, '辽阳县', 211000), (211081, '灯塔市', 211000), (211101, '市辖区', 211100), (211102, '双台子区', 211100), (211103, '兴隆台区', 211100), (211104, '大洼区', 211100), (211122, '盘山县', 211100), (211201, '市辖区', 211200), (211202, '银州区', 211200), (211204, '清河区', 211200), (211221, '铁岭县', 211200), (211223, '西丰县', 211200), (211224, '昌图县', 211200), (211281, '调兵山市', 211200), (211282, '开原市', 211200), (211301, '市辖区', 211300), (211302, '双塔区', 211300), (211303, '龙城区', 211300), (211321, '朝阳县', 211300), (211322, '建平县', 211300), (211324, '喀喇沁左翼蒙古族自治县', 211300), (211381, '北票市', 211300), (211382, '凌源市', 211300), (211401, '市辖区', 211400), (211402, '连山区', 211400), (211403, '龙港区', 211400), (211404, '南票区', 211400), (211421, '绥中县', 211400), (211422, '建昌县', 211400), (211481, '兴城市', 211400); ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值