网络爬虫案例——TIOBE指数前20名排行开发语言

最新推荐文章于 2024-08-13 20:17:14 发布

戴夫爱吃胡萝卜

最新推荐文章于 2024-08-13 20:17:14 发布

阅读量512

点赞数 1

分类专栏：网络爬虫文章标签： python

本文链接：https://blog.csdn.net/linbomm/article/details/106028610

版权

网络爬虫专栏收录该内容

4 篇文章 0 订阅

订阅专栏

import requests
from lxml import etree

# 取得html
def getHtml(html):
    url=html
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"     
    }
    try:
        response=requests.get(url,headers=headers) # get请求
#         print(response.status_code) # 测试
        response.encoding="utf-8" #编码
        html=response.text
        return html
    except ReadTimeout:
        print("time out")
    except ConnectionError:
        print("connection error")
    except RequestException:
        print("request error")
        
        
def getInformation(html):
    html=etree.HTML(html,etree.HTMLParser())
    result=html.xpath('//*[@id="top20"]/tbody/tr/td/text()')
    pos = 0
    for i in range(20):
        yield result[pos:pos+5]
        pos+=5
    
def printInformation(data):
    tplt = "{0:^10}\t{1:^15}\t{2:^50}\t{3:^10}\t{4:^10}"
    print(tplt.format("2020.3","2019.3","编程语言","评分","变化率",chr(12288)))
    for i in data:
        print(tplt.format(i[0],i[1],i[2],i[3],i[4],chr(12288)))        
        
def main():
    result=[]  # 用于存放结果
    url="https://www.tiobe.com/tiobe-index/"
    html=getHtml(url)
    result=getInformation(html)
    printInformation(result)
    
#         store(result,i)
    print("OK")
main()