- 导入模块
- 设置行列
- 读取网页数据
- 设置表头
- 删除无用的表头
import pandas as pd
# import numpy as np
# 设置最大行,最大列
pd.set_option("display.max_column", 10)
pd.set_option("display.max_row", 10)
url = "https://nba.hupu.com/standings"
res = pd.read_html(url)
# print(res)
# print(res[0])
res = res[0]
# 让第2行作为表头
res.columns = res.loc[1]
# 自由设置表头
# res.columns = ["1", "2", "2", "2", "2", "2", "2", "2",
# "2", "2", "2", "2", "2", "2"]
# 删掉前两行数据,不算表头
res.drop([0, 1], inplace=True)
# res.drop(1, inplace=True)
# res = res.groupby("连胜/负")
# 打印表头
print(res.head())
# 打印分组
# print(res.groups)
# 打印所有组的个数
# print(len(res.groups))
# 打印每个组的成员个数
# print(res.size())
# 对数据进行排序
# print(res.sort_values(ascending=False))
print(res.sort_values(by=['得分'], ascending=False))