#!usr/bin/python # -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup from requests.exceptions import RequestException import re from lxml import etree import pandas
url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html" ret = requests.get(url) ret.encoding = 'utf-8' html = etree.HTML(ret.content) # mes = html.xpath('/html/body/div[3]/div/div[2]/div/div[3]/div/table/tbody/tr//text()') a1 = [] a2 = [] a3 = [] a4 = [] a5 = [] a6 = [] a7 = [] a8 = [] a9 = [] a10 = [] a11 = [] a12 = [] a13 = [] a14 = [] message = {} for i in range(1, 550): mes0 = html.xpath('//tbody[@class="hidden_zhpm"]//tr[{}]//text()'.format(i)) print(mes0) a1.append(mes0[0]) a2.append(mes0[1]) a3.append(mes0[2]) a4.append(mes0[3]) a5.append(mes0[4]) a6.append(mes0[5]) a7.append(mes0[6]) a8.append(mes0[7]) a9.append(mes0[8]) if len(mes0) >= 10: a10.append(mes0[9]) else: a10.append('None') if len(mes0) >= 11: a11.append(mes0[10]) else: a11.append(None) if len(mes0) >= 12: a12.append(mes0[11]) else: a12.append('None') if len(mes0) >= 13: a13.append(mes0[12]) else: a13.append('None') if len(mes0) == 14: a14.append(mes0[13]) else: a14.append('None') message['排名'] = a1 message['学校名'] = a2 message['所在地'] = a3 message['总分'] = a4 message['生源质量'] = a5 message['培养结果'] = a6 message['社会声誉'] = a7 message['科研规模'] = a8 message['科研质量'] = a9 message['顶尖成果'] = a10 message['顶尖人才'] = a11 message['科技服务'] = a12 message['成果转化'] = a13 message['学生国际化'] = a14 print(message) df = pandas.DataFrame(message, columns = ['排名', '学校名', '所在地', '总分', '生源质量', '培养结果', '社会声誉', '科研规模', '科研质量', '顶尖成果', '顶尖人才', '科技服务', '成果转化', '学生国际化']) df.to_excel('中国最好的学排名.xlsx')