爬虫之爬取“最好大学排名”并以Excel表格形式打印出来

#!usr/bin/python
# -*- coding: utf-8 -*-


import requests
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
import re
from lxml import etree
import pandas

url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html"
ret = requests.get(url)
ret.encoding = 'utf-8'
html = etree.HTML(ret.content)

# mes = html.xpath('/html/body/div[3]/div/div[2]/div/div[3]/div/table/tbody/tr//text()')
a1 = []
a2 = []
a3 = []
a4 = []
a5 = []
a6 = []
a7 = []
a8 = []
a9 = []
a10 = []
a11 = []
a12 = []
a13 = []
a14 = []
message = {}

for i in range(1, 550):
    mes0 = html.xpath('//tbody[@class="hidden_zhpm"]//tr[{}]//text()'.format(i))
    print(mes0)
    a1.append(mes0[0])
    a2.append(mes0[1])
    a3.append(mes0[2])
    a4.append(mes0[3])
    a5.append(mes0[4])
    a6.append(mes0[5])
    a7.append(mes0[6])
    a8.append(mes0[7])
    a9.append(mes0[8])
    if len(mes0) >= 10:
        a10.append(mes0[9])
    else:
        a10.append('None')
    if len(mes0) >= 11:
        a11.append(mes0[10])
    else:
        a11.append(None)
    if len(mes0) >= 12:
        a12.append(mes0[11])
    else:
        a12.append('None')
    if len(mes0) >= 13:
        a13.append(mes0[12])
    else:
         a13.append('None')
    if len(mes0) == 14:
        a14.append(mes0[13])
    else:
        a14.append('None')


message['排名'] = a1
message['学校名'] = a2
message['所在地'] = a3
message['总分'] = a4
message['生源质量'] = a5
message['培养结果'] = a6
message['社会声誉'] = a7
message['科研规模'] = a8
message['科研质量'] = a9
message['顶尖成果'] = a10
message['顶尖人才'] = a11
message['科技服务'] = a12
message['成果转化'] = a13
message['学生国际化'] = a14

print(message)

df = pandas.DataFrame(message, columns = ['排名', '学校名', '所在地', '总分', '生源质量', '培养结果', '社会声誉', '科研规模', '科研质量', '顶尖成果', '顶尖人才', '科技服务', '成果转化', '学生国际化'])

df.to_excel('中国最好的学排名.xlsx')

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值