from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from time import sleep
headers = {
'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36 '}
html = urlopen('http://www.fortunechina.com/fortune500/c/2020-08/10/content_372148.htm')
bs = BeautifulSoup(html,'html.parser')
"""提取标签里的信息"""
wb = bs.find('tbody')
# for name in wb:
# print(name.get_text())
"""将我爬取的内容进行合并再换行"""
list1=[]
list2=[]
list3=[]
list4=[]
ran=0
for i in wb:
ran+=1
if ran % 2 ==0:
cot = 0
for a in i:
cot+=1
if cot==4:
list1.append(a.get_text())
elif cot==6:
list2.append(a.get_text())
elif cot==8:
list3.append(a.get_text())
elif cot==10:
list4.append(a.get_text())
tplt = "{0:{4}<10}\t{1:{4}^10}\t{2:^10}\t{3:^10}\t{4:^10}"
list0=[]
for i in range(0,500):
list0.append(i)
print(tplt.format(list2[i],list3[i],list4[i],list1[i],chr(12288)))
"""中文对齐一下"""
爬取世界五百强
最新推荐文章于 2022-01-16 19:00:37 发布