使用Graphiz生成rpm dependency graph

需求如下:
1,项目中有一大型软件系统,该软件系统的软件仓库中有许多rpm软件包

2,该软件系统的软件仓库有不同的版本

3,同一个软件仓库下的同一软件包可能有多个版本

4,同一个软件仓库下的同一版本的软件包可能有多个副本,分放在不同的子目录

5,需要检查指定版本的软件库下所有rpm软件包的依赖关系,要求只检查所有最新版本软件包的依赖关系

6,比较不同软件库之间的软件包差异,包括依赖关系的差异。

Graphiz能帮助解决这个问题。Graphiz提供了很多接口、工具,dot正好符合需求。


主要思路如下:

1, 通过rpm命令查询package的元信息,并存入txt文件。包括package的provides、requires、name、version、release等信息

2, 将txt转化为dot格式

3, 将dot格式输出为可读的pdf文件,也可以输出其他诸如png等格式文件


下面是python代码实现(目前不包括不同版本的软件仓库之间的差异比较),同时支持python 2.7 和 3.2:

import os
import time
import sys
import subprocess
import shutil
import re
import getopt
import datetime
import threading
import locale
import stat

LANGUAGE, ENCODING = locale.getdefaultlocale()

# compare package1 to package2, check if the package1 is newer than package2
# each package has a version string and a release string
# version format: a.b.c.d or a.b.c
# release format: xx_xxxxxxxx or xx
def compare(ver1, rls1, ver2, rls2):
    if ver1 == ver2:
        token1 = rls1.split('_')
        token2 = rls2.split('_')
        if len(token1) == 1:
            return int(token1[0]) - int(token2[0])
        else:
            return int(token1[1]) - int(token2[1])
    else:
        token1 = ver1.split('.')
        token2 = ver2.split('.')
        for i in range(len(token1)):
            if token1[i] != token2[i]:
                return int(token1[i]) - int(token2[i])
        return len(token1) - len(token2)

def comment(fd):
    notes = "\\lNOTES for this graph:"
    notes = notes + "\\l1, Green edge package: multi-versioned package,"
    notes = notes + " user could install multiple versions for that package"
    notes = notes + "\\l2, Red edge package:   singleton package, "
    notes = notes + "old version package will be removed if user install a new version"
    notes = notes + "\\l3, Black edge package: virtual package"
    notes = notes + "\\l4, Solid-line:  line to link a real package to a real package"
    notes = notes + "\\l5, Dot-line:    line to link a virtual package to a real package\\l"
    fd.write("    notes [shape=record, label=\"%s\", fontsize=16]\n" % notes)
    #fd.write("    { rank=max ; \"notes\" }\n")

'''
spin:  path of spin
graph: output directory for graph document
'''
def rpmgragh(spin, graph, debug=False):
    if not os.path.exists(spin):
        raise SystemExit("Spin:'%s' is not accessible." % spin)

    st = os.stat(spin)
    if not stat.S_ISDIR(st.st_mode):
        raise SystemExit("Spin:'%s' is not a directory." % spin)

    spin = os.path.abspath(spin)
    spin = os.path.dirname(spin) + "/"
    list = p.split(spin)
    spinName = list[len(list) - 1]

    if not os.path.exists(graph):
        raise SystemExit("Path:'%s' is not accessible." % graph)

    st = os.stat(graph)
    if not stat.S_ISDIR(st.st_mode):
        raise SystemExit("Path:'%s' is not a directory." % graph)

    txt = "%s/%s.txt" % (graph, spinName)
    if os.path.exists(txt):
        os.remove(txt)
    dot = "%s/%s.dot" % (graph, spinName)
    if os.path.exists(dot):
        os.remove(dot)
    pdf = "%s/%s.pdf" % (graph, spinName)
    if os.path.exists(pdf):
        os.remove(pdf)

    print("Creating a file %s " % pdf)
    print("Generation started ...")

    # the same rpm shipped in multiple repos, remove the duplicates using map
    maps = {}
    cmd = 'find %s -type f -name "*.rpm"' % spin
    rc = subprocess.check_output(cmd, shell=True)
    lines = rc.decode(ENCODING).split('\n')
    for line in lines:
        tokens = line.split('/')
        if tokens[len(tokens) - 1] == '':
            continue

        # query the actual name of package
        cmd = 'rpm -qp --queryformat "%{Name}\n"' + ' %s' % line.strip()
        name = subprocess.check_output(cmd, shell=True)
        name = name.strip().decode(ENCODING)

        # query the version of package
        cmd = 'rpm -qp --queryformat "%{Version}\n"' + ' %s' % line.strip()
        version = subprocess.check_output(cmd, shell=True)
        version = version.strip().decode(ENCODING)

        # query the release of package
        cmd = 'rpm -qp --queryformat "%{Release}\n"' + ' %s' % line.strip()
        release = subprocess.check_output(cmd, shell=True)
        release = release.strip().decode(ENCODING)

        # map: name --> {version, release, path}
        if not name in maps:
            maps[name] = {"version":version, "release":release, "path":line.strip()}
        else:
            version1 = maps[name]["version"]
            release1 = maps[name]["release"]
            if compare(version, release, version1, release1) > 0:
                maps[name] = {"version":version, "release":release, "path":line.strip()}

    # write the header of the metadata file
    fd = open(txt, 'w+')
    fd.write("digraph XXX {\n")
    fd.write("    rankdir=LR\n")
    fd.write("    //===== Packages:\n")

    # write comments for this graph
    comment(fd)

    for name in sorted(maps.keys()):
        # write comments for a package
        version = maps[name]["version"]
        release = maps[name]["release"]
        if debug == True:
            path = os.path.dirname(maps[name]["path"])
            path = path.split(spinName)[1]
            fd.write("    //    0    0 %s %s %s %s\n" % (name, version, release, path))
        else:
            fd.write("    //    0    0 %s %s %s\n" % (name, version, release))

        # query installonlypkg attribute
        cmd = 'rpm -qp --provides %s | grep -v "rpmlib(" | cut -d" " -f 1' % (maps[name]["path"])
        rc = subprocess.check_output(cmd, shell=True)
        provides = rc.decode(ENCODING).split('\n')
        if "installonlypkg" in provides:
            fd.write("    \"%s\" [color=green]\n" % name)
        else:
            fd.write("    \"%s\" [color=red]\n" % name)

        # query the dependencies
        cmd = 'rpm -qp --requires %s | grep -v "rpmlib(" | cut -d" " -f 1' % maps[name]["path"]
        rc = subprocess.check_output(cmd, shell=True)
        requires = rc.decode(ENCODING).split('\n')
        count = False
        for line in requires:
            if line.strip() != '':
                count = True
                fd.write("    \"%s\" -> \"%s\"\n" % (name, line))
        if count == False:
            # put the node on the most left of graph
            fd.write("    { rank=max ; \"%s\" }\n" % name)

        # Make sure virtual packages are also listed, as nodes without depends
        for line in provides:
            line = line.strip()
            if line != '' and not "installonlypkg" in line and not name in line:
                fd.write("    //    0    0 %s\n" % line)
                fd.write("    \"%s\" -> \"%s\" [style=dotted]\n" % (line, name))

    # write the footer of the metadata file
    fd.write("}\n")

    # close the metadata file
    fd.flush()
    fd.close()

    # convert the format to dot
    cmd = 'dot %s > %s' % (txt, dot)
    rc = subprocess.check_output(cmd, shell=True)

    # convert the format to pdf
    cmd = 'dot -Tpdf -o%s %s' % (pdf, dot)
    rc = subprocess.check_output(cmd, shell=True)

    print("Generation complete")

    return 0

def usage(exe):
    print('usage: %s arg1 arg2 [debug]' % exe)
    print('  arg1: path of spin')
    print('  arg2: output directory for graph document')
    print('  debug: output debug information for each package')

if __name__ == '__main__':
    argc = len(sys.argv)
    if argc <= 2:
        usage(sys.argv[0])
        sys.exit(1)
    else:
        debug = False
        dvd = sys.argv[1]
        target = sys.argv[2]
        if argc >= 4:
            debug = True
        rpmgragh(dvd, target, debug)


使用例子如下,其中”omitted“表示忽略的path信息:

[xxx@build1 graph]$ ./rpmgraph.py /omitted/vx20140423093452/ ~/omitted/graph/
Creating a file /omitted/graph/vx20140423093452.pdf 
Generation started ...
Generation complete
[xx@build1 graph]$ 

示例图片:


展开阅读全文

没有更多推荐了,返回首页