网络爬虫案例——TIOBE指数前20名排行开发语言

import requests
from lxml import etree

# 取得html
def getHtml(html):
    url=html
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"     
    }
    try:
        response=requests.get(url,headers=headers) # get请求
#         print(response.status_code) # 测试
        response.encoding="utf-8" #编码
        html=response.text
        return html
    except ReadTimeout:
        print("time out")
    except ConnectionError:
        print("connection error")
    except RequestException:
        print("request error")
        
        
def getInformation(html):
    html=etree.HTML(html,etree.HTMLParser())
    result=html.xpath('//*[@id="top20"]/tbody/tr/td/text()')
    pos = 0
    for i in range(20):
        yield result[pos:pos+5]
        pos+=5
    
def printInformation(data):
    tplt = "{0:^10}\t{1:^15}\t{2:^50}\t{3:^10}\t{4:^10}"
    print(tplt.format("2020.3","2019.3","编程语言","评分","变化率",chr(12288)))
    for i in data:
        print(tplt.format(i[0],i[1],i[2],i[3],i[4],chr(12288)))        
        
def main():
    result=[]  # 用于存放结果
    url="https://www.tiobe.com/tiobe-index/"
    html=getHtml(url)
    result=getInformation(html)
    printInformation(result)
    
#         store(result,i)
    print("OK")
main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值