import requests
from lxml import etree
# 取得html
def getHtml(html):
url=html
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"
}
try:
response=requests.get(url,headers=headers) # get请求
# print(response.status_code) # 测试
response.encoding="utf-8" #编码
html=response.text
return html
except ReadTimeout:
print("time out")
except ConnectionError:
print("connection error")
except RequestException:
print("request error")
def getInformation(html):
html=etree.HTML(html,etree.HTMLParser())
result=html.xpath('//*[@id="top20"]/tbody/tr/td/text()')
pos = 0
for i in range(20):
yield result[pos:pos+5]
pos+=5
def printInformation(data):
tplt = "{0:^10}\t{1:^15}\t{2:^50}\t{3:^10}\t{4:^10}"
print(tplt.format("2020.3","2019.3","编程语言","评分","变化率",chr(12288)))
for i in data:
print(tplt.format(i[0],i[1],i[2],i[3],i[4],chr(12288)))
def main():
result=[] # 用于存放结果
url="https://www.tiobe.com/tiobe-index/"
html=getHtml(url)
result=getInformation(html)
printInformation(result)
# store(result,i)
print("OK")
main()
网络爬虫案例——TIOBE指数前20名排行开发语言
最新推荐文章于 2024-08-13 20:17:14 发布