import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillProgramList(programInfo, html):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
programInfo.append([tds[0].string, tds[1].string, tds[3].string,tds[4].string,tds[5].string])
def printProgramList(programInfo, num):
print("{:^10}\t{:^10}\t{:^20}\t{:^10}\t{:^10}".format("Aug2018","Aug2017","Programming Language","Ratings","Change"))
for i in range(num):
p=programInfo[i]
print("{:^10}\t{:^10}\t{:^20}\t{:^10}\t{:^10}".format(p[0],p[1],p[2],p[3],p[4]))
def main():
programInfo = []
url = 'https://www.tiobe.com/tiobe-index/'
html = getHTMLText(url)
fillProgramList(programInfo, html)
printProgramList(programInfo, 20) # 20 programs
main()
python爬虫(四)编程语言排名爬虫
最新推荐文章于 2023-06-19 10:21:34 发布