【python】python抓取activemq管理界面

需求说明

需要通过python将activemq的管理页面通过爬虫抓取下来,显示各消息队列的信息。
由于是刚刚学习的python,花了一个下午才写好,只是基本实现了功能,但是有些地方还是比较简单,等以后对于爬虫知识更加了解了再修改吧
直接贴代码了:

实现代码

#-*- coding:utf-8 -*-
"""
使用方法:
执行脚本可添加参数,如:
python parsemq.py aaa bbb
即只打印消息队列aaa与bbb的信息

python parsemq.py
不写参数,打印出全部的消息队列信息

python parsemq.py -r 每隔一秒循环打印全部
python parsemq.py -r  aaa bbb 每隔一秒循环打印aaa与bbb的信息
"""
import urllib2,urllib
import HTMLParser
import os
import sys
import time

#解析html
class MyHTMLParser(HTMLParser.HTMLParser):
   def __init__(self):
      HTMLParser.HTMLParser.__init__(self)
      self.trflag = False
      self.tdflag = False
      self.tdcount = 0
      self.data = []
   #标签开始,这里只关心tr与td
   def handle_starttag(self, tag, attrs):
      if tag == 'tr':
         self.trflag = True
         self.tdcount = 0
      if self.trflag == True and tag == 'td':
         self.tdcount = self.tdcount + 1
         #只关心前五个
         if self.tdcount <= 5:
            self.tdflag = True
         else:
            self.tdflag = False
   #标签结束,这里只关心tr与td
   def handle_endtag(self, tag):
      if tag == "tr":
         self.trflag = False
         self.tdflag = False
         self.printdatas()
         self.data = []
      if tag == 'td' and self.tdflag == True:
         self.tdflag = False
    #得到data
   def handle_data(self, data):
      if self.tdflag == True:
         if data:
            #去除回车
            data1 = data.replace('\r','').replace('\n','')
            self.data.append(data1)
   #打印信息
   def printdata(self):
       print "~~~~~~~~~~~~~~~~~~~~~"
       print "queue     :" ,self.data[0]
       print "pending   :" ,self.data[1]
       print "consummers:" ,self.data[2]
       print "enqueued  :" ,self.data[3]
       print "dequeued  :" ,self.data[4]

   def printdatas(self):
       if len(self.data) == 5:
           if len(forcusmq) != 0:
               for forcusdata in forcusmq:
                   if self.data[0] == forcusdata:
                       self.printdata()
                       break
           else:
               self.printdata()

def pararg():
    if len(sys.argv) > 1:
        print sys.argv[1]
        if sys.argv[1] == "-r":
            global isfor
            isfor = 1
            if len(sys.argv) > 2:
                for i in range(2,len(sys.argv)):
                    forcusmq.append(str(sys.argv[i]))
        else:
            for i in range(1,len(sys.argv)):
                forcusmq.append(str(sys.argv[i]))
    if len(forcusmq):
        print "output queue:",
        for i in forcusmq:
            print i,
        print

def getmqip():
    mqip = raw_input("input mq ip:")
    print "mq ip:", mqip
    return mqip

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

forcusmq = []
isfor = 0

pararg()
mqip = getmqip()

user = "admin"
passwd = "admin"
login_url = "http://%s:8161/admin/queues.jsp" % mqip
print "connect to",login_url

#urllib2抓取html
passwdmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
passwdmgr.add_password(None, login_url, user, passwd)
httpauth_hander = urllib2.HTTPBasicAuthHandler(passwdmgr)
opener = urllib2.build_opener(httpauth_hander)
urllib2.install_opener(opener)

while True:
    os.system('clear')
    print "__________________",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"____________________"
    print "~~~~~~~~~~~~~~~~~~~~~~~~~begin~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    req = urllib2.Request(login_url)
    res = urllib2.urlopen(req)
    data = res.read()

    myhtml = MyHTMLParser()
    myhtml.feed(data)
    myhtml.close()
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~end~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    if isfor == 0:
        break
    time.sleep(1)

exit ()

参考

https://blog.csdn.net/weicao1990/article/details/80066655
http://www.cnblogs.com/madsnotes/articles/5687079.html
https://www.cnblogs.com/hongten/p/hongten_python_htmlparser.html

转载于:https://www.cnblogs.com/JesseTsou/p/10115111.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值