import requests #引入相关操作库
from bs4 import BeautifulSoup
import pandas as pd
ranks = [] #创建空列表存储数据信息
names = []
name_englishs = []
fortunes = []
sources = []
areas = []
url = 'http://www.forbeschina.com/lists/1733' #获取网址
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser') #使用bs进行解析
items = soup.find('tbody').find_all('tr') #获取所有数据信息
for item in items: #对信息遍历并实现存储
content = item.find_all('td')
rank = content[0].get_text()
name = content[1].get_text()
name_english = content[2].get_text()
fortune = content[3].get_text()
source = content[4].get_text()
area = content[5].get_text()
ranks.append(rank)
names.append(name)
name_englishs.append(name_english)
fortunes.append(fortune)
sources.append(source)
areas.append(area)
df = pd.DataFrame({ #构造DataFrame存储数据信息
'排名': ranks,
'姓名': names,
'姓名(英文)': name_englishs,
'财富(亿美元)': fortunes,
'财富来源': sources,
'国家和地区': areas
})
df.to_csv('全球亿万富豪榜.csv', encoding='gb18030', index=False) #保存至csv文件
10-01